forked from shedsaw/exciting-plus-rgvw-mod
-
Notifications
You must be signed in to change notification settings - Fork 0
/
make.inc.cori.nv.acc
153 lines (118 loc) · 5.52 KB
/
make.inc.cori.nv.acc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#==============================================================================
# Exciting-Plus make.inc file for Cori-GPU (NERSC) with PGI compilers
# Last edited: Sep 22, 2020 (WYP)
#==============================================================================
MAKE = make
COMPILER = nv
# Compiler switch to turn on the preprocessor
CPP_OPTS = -Mpreprocess
# Add your custom preprocessor defines here
#CPP_OPTS +=
# Common compile time options
F90_OPTS = $(CPP_OPTS)
# Add your common compile time options here
#F90_OPTS += -Minfo=ftn,loop,opt,mp,par,vect # Show optimizer log
F90_OPTS += -Minfo=accel # Show optimizer log only for OpenACC
F90_OPTS += -Mstack_arrays # Use stack instead of heap for automatic arrays
#==============================================================================
# OpenACC
#==============================================================================
#F90_OPTS += -gpu=managed:debug # managed memory, debug build
F90_OPTS += -gpu=managed # managed memory, optimized build
#F90_OPTS += -gpu=debug # non-managed memory, debug build
F90_OPTS += -acc=gpu # run on GPU
#F90_OPTS += -acc=multicore # run on CPU (for debugging purposes)
#F90_OPTS += -acc=verystrict # debug build
F90_OPTS += -acc=autopar # optimized build
#==============================================================================
# MPI
#==============================================================================
F90 = mpifort
CC = mpicc
CXX = mpic++
MPI_CPP_OPTS = -D_MPI_
CPP_OPTS += $(MPI_CPP_OPTS)
#==============================================================================
# OpenMP
#==============================================================================
# These are passed at compile time and link time
# Don't forget to `export OMP_STACKSIZE=2G` at runtime
OMP_OPTS = -mp
F90_OPTS += $(OMP_OPTS)
#==============================================================================
# Serial compiler (for utilities)
#==============================================================================
F90SERIAL = nvfortran
F90_OPTS_SERIAL = $(filter-out $(OMP_OPTS),$(filter-out $(MPI_CPP_OPTS),$(F90_OPTS)))
#==============================================================================
# Compiler and linker options
#==============================================================================
# Debugging
#F90_OPTS += -g -Minform=warn
#F90_LINK_OPTS = $(F90_OPTS)
#EXE_SFX = dbg-acc
# Debugging with extra checks
# (Don't forget to enable core dump generation using `ulimit -c unlimited`)
#F90_OPTS += -g -O0 -Minform=warn -Mbounds -traceback -Ktrap=inv
#F90_LINK_OPTS = $(F90_OPTS)
#EXE_SFX = dbgchk-acc
# Optimized build with equivalent options to '-O2'
F90_OPTS += -gopt -O -Munroll -Mlre -Mvect=simd -Mflushz -Mcache_align -Mnoinline -Minform=warn
F90_LINK_OPTS = $(F90_OPTS) -fpic
EXE_SFX = opt-acc
# Profiling
# Note: don't use '-Mpfi', it is incompatible with '-mp' and '-fpic'
#F90_OPTS += -Minstrument -Mprof=ccff -O -Munroll -Mlre -Mflushz -Mcache_align -Mnoinline -Minform=warn
#F90_LINK_OPTS = $(F90_OPTS) -fpic
#EXE_SFX = prof
# Fully optimized build
# Note: with PGI 19.9 and 19.10, for some reason using '-O2' or higher
# (including '-fast' and '-fastsse') chokes on autoradmt.f90:39
# DO i = -1, 1
# (see OLCF ticket #419691)
# This has been fixed in PGI 20.1
#F90_OPTS += -fast -Munroll -Mnoinline -Minform=warn
#F90_LINK_OPTS = $(F90_OPTS) -fpic
#EXE_SFX = fast
# Profiling, fully optimized
# Note: make sure to use PGI 20.1, see above
#F90_OPTS += -Minstrument -Mprof=ccff -fast -Munroll -Mnoinline -Minform=warn
#F90_LINK_OPTS = $(F90_OPTS) -fpic
#EXE_SFX = fastprof
#==============================================================================
# BLAS and LAPACK
#==============================================================================
# Use PGI's bundled BLAS and LAPACK
#LAPACK_LIB = -lblas -llapack
# Use Intel MKL
F90_OPTS += -fortranlibs -I${MKLROOT}/include
#LAPACK_LIB = -L${MKLROOT}/lib/intel64 -L/opt/intel/compilers_and_libraries/linux/lib/intel64 -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm -ldl
LAPACK_LIB = -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl
#==============================================================================
# HDF5
#==============================================================================
# To disable, comment these three lines
# Make sure to `module load hdf5`
#HDF5_INC = -I${OLCF_HDF5_ROOT}/include
#HDF5_LIB = -L${OLCF_HDF5_ROOT}/lib -lhdf5_fortran -lhdf5_cpp -lhdf5_hl -lhdf5 -ldl
#HDF5_CPP_OPTS = -D_HDF5_
CPP_OPTS += $(HDF5_CPP_OPTS)
F90_OPTS += $(HDF5_INC)
#==============================================================================
# Other libraries (retained here for historical purposes)
#==============================================================================
# ==- compile with libXC support ===
#CPP_OPTS := $(CPP_OPTS) -D_LIBXC_
#XC_LIB =
# ==- compile with NFFT support ===
#CPP_OPTS := $(CPP_OPTS) -D_NFFT_
#NFFT_INC = -I$(HOME)/local/include
#NFFT_LIB = $(HOME)/local/lib/libnfft3.a $(HOME)/local/lib/libfftw3.a
# === compile with Madness API ===
#CPP_OPTS := $(CPP_OPTS) -D_MAD_
#MADNESS_INC = -I$(HOME)/local/include
#MADNESS_LIB = -L$(HOME)/local/lib/ -lMADmra -lMADlinalg -lMADtensor -lMADmisc -lMADmuparser -lMADtinyxml -lMADworld -lmpichcxx -lstdc++
#==============================================================================
# List all libraries to link
#==============================================================================
LIBS = $(LAPACK_LIB) $(HDF5_LIB) # -L${CUDA_DIR}/lib64/ -lnvToolsExt