forked from shedsaw/exciting-plus-rgvw-mod
-
Notifications
You must be signed in to change notification settings - Fork 0
/
make.inc.jetson.nv.acc
167 lines (128 loc) · 5.78 KB
/
make.inc.jetson.nv.acc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#==============================================================================
# Exciting-Plus make.inc file for wyp-Jetson with NVIDIA HPC SDK
# Last edited: Dec 30, 2020 (WYP)
#==============================================================================
MAKE = make
COMPILER = nv
# Compiler switch to turn on the preprocessor
CPP_OPTS = -Mpreprocess
# Add your custom preprocessor defines here
#CPP_OPTS += -D_DEBUG_megqblh_ # Output debugging info related to genmegqblh
# Common compile time options
F90_OPTS = $(CPP_OPTS)
# Add your common compile time options here
#F90_OPTS += -Minfo=ftn,loop,opt,mp,par,vect # Show optimizer log
F90_OPTS += -Minfo=accel # Show optimizer log only for OpenACC
F90_OPTS += -Mstack_arrays # Use stack instead of heap for automatic arrays
#==============================================================================
# OpenACC
#==============================================================================
#F90_OPTS += -gpu=managed:debug # Debug build, managed memory
F90_OPTS += -gpu=managed # Optimized build, managed memory
#F90_OPTS += -gpu=debug # Debug build. non-managed memory
F90_OPTS += -acc=gpu # run on GPU
#F90_OPTS += -acc=multicore # Debug build, run on CPU
#F90_OPTS += -acc=verystrict # debug build
F90_OPTS += -acc=autopar # optimized build
#==============================================================================
# MPI
#==============================================================================
F90 = mpifort
CC = mpicc
CXX = mpic++
MPI_CPP_OPTS = -D_MPI_
CPP_OPTS += $(MPI_CPP_OPTS)
#==============================================================================
# OpenMP
#==============================================================================
# These are passed at compile time and link time
# Don't forget to `export OMP_STACKSIZE=2G` at runtime
OMP_OPTS = -mp
#==============================================================================
# Serial compiler (for utilities)
#==============================================================================
F90SERIAL = nvfortran
F90_OPTS_SERIAL = $(filter-out $(OMP_OPTS),$(filter-out $(MPI_CPP_OPTS),$(F90_OPTS)))
#==============================================================================
# Compiler and linker options
#==============================================================================
# Debugging
#CPP_OPTS += -DEBUG=1
#F90_OPTS += -g -Minform=warn
#F90_LINK_OPTS = $(F90_OPTS)
#EXE_SFX = dbg-acc
# Debugging with extra checks
# (Don't forget to enable core dump generation using `ulimit -c unlimited`)
#CPP_OPTS += -DEBUG=3
#F90_OPTS += -g -O0 -Minform=warn -Mbounds -traceback
#F90_LINK_OPTS = $(F90_OPTS)
#EXE_SFX = dbgchk-acc
# Optimized build with equivalent options to '-O2'
F90_OPTS += -gopt -O -Mlre -Mvect=simd -Mflushz -Mcache_align -Mnoinline -Minform=warn
F90_LINK_OPTS = $(F90_OPTS) -fpic
EXE_SFX = opt-acc
# Profiling
# Note: don't use '-Mpfi', it is incompatible with '-mp' and '-fpic'
#F90_OPTS += -Minstrument -Mprof=ccff -O -Mlre -Mflushz -Mcache_align -Mnoinline -Minform=warn
#F90_LINK_OPTS = $(F90_OPTS) -fpic
#EXE_SFX = prof-acc
# Fully optimized build
# Warning: might give incorrect results
#F90_OPTS += -fast -Munroll -Mnoinline -Minform=warn
#F90_LINK_OPTS = $(F90_OPTS) -fpic
#EXE_SFX = fast-acc
# Profiling, fully optimized
#F90_OPTS += -Minstrument -Mprof=ccff -fast -Munroll -Mnoinline -Minform=warn
#F90_LINK_OPTS = $(F90_OPTS) -fPIC
#EXE_SFX = fastprof-acc
#==============================================================================
# BLAS and LAPACK
#==============================================================================
# Use bundled OpenBLAS 0.3.7 (not threadsafe!)
LAPACK_LIB = -lblas -llapack
# Use OpenBLAS
#OBLAS_PATH = /opt/openblas/openblas-0.3.10/nv-20.11
#LAPACK_LIB = -L$(OBLAS_PATH)/lib -lopenblas
# Use MAGMA
MAGMA_PATH = /opt/magma/magma-2.5.4/nv20.11+cuda11.1+builtin
#MAGMA_PATH = /opt/magma/magma-2.5.4/nv20.11+cuda11.1+openblas0.3.10
MAGMA_INC = -I$(MAGMA_PATH)/include
LAPACK_LIB += -L$(MAGMA_PATH)/lib -lmagma
CPP_OPTS += -D_MAGMA_ -DNGPUS=1
F90_OPTS += $(MAGMA_INC)
#==============================================================================
# FFTW 3
#==============================================================================
# To disable, comment these three lines
#FFT_INC = -I/opt/fftw/fftw-3.3.8/nv-20.11/include
#FFT_LIB = -L/opt/fftw/fftw-3.3.8/nv-20.11/lib -lfftw3_omp -lfftw3 -lm
#FFT_CPP_OPTS = -D_FFTW3_
CPP_OPTS += $(FFT_CPP_OPTS)
F90_OPTS += $(FFT_INC)
#==============================================================================
# HDF5
#==============================================================================
# To disable, comment these three lines
#HDF5_INC = -I/opt/hdf5/hdf5-1.12.0/nv-20.11/include
#HDF5_LIB = -L/opt/hdf5/hdf5-1.12.0/nv-20.11/lib -lhdf5_fortran -lhdf5_hl -lhdf5 -ldl
#HDF5_CPP_OPTS = -D_HDF5_
CPP_OPTS += $(HDF5_CPP_OPTS)
F90_OPTS += $(HDF5_INC)
#==============================================================================
# Other libraries (retained here for historical purposes)
#==============================================================================
# ==- compile with libXC support ===
#CPP_OPTS := $(CPP_OPTS) -D_LIBXC_
#XC_LIB =
# ==- compile with NFFT support ===
#CPP_OPTS := $(CPP_OPTS) -D_NFFT_
#NFFT_INC = -I$(HOME)/local/include
#NFFT_LIB = $(HOME)/local/lib/libnfft3.a $(HOME)/local/lib/libfftw3.a
# === compile with Madness API ===
#CPP_OPTS := $(CPP_OPTS) -D_MAD_
#MADNESS_INC = -I$(HOME)/local/include
#MADNESS_LIB = -L$(HOME)/local/lib/ -lMADmra -lMADlinalg -lMADtensor -lMADmisc -lMADmuparser -lMADtinyxml -lMADworld -lmpichcxx -lstdc++
#==============================================================================
# List all libraries to link
#==============================================================================
LIBS = $(LAPACK_LIB) $(FFT_LIB) $(HDF5_LIB)