Skip to content

Commit

Permalink
Feature/dawson/atslite1 args (#150)
Browse files Browse the repository at this point in the history
* First itteration of running on same node under flux

* Typo on option fixed

* Updating to test --level and --filter options with atslite1

* Further Updating to test --level and --filter options with atslite1

* Fix issue reported by Ben Liu regarding lrun and old_default and mpibind

---------

Co-authored-by: MishaZakharchanka <zakharchanka1@llnl.gov>
  • Loading branch information
dawson6 and MishaZakharchanka authored Aug 28, 2023
1 parent b6024ab commit ca6fa95
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 43 deletions.
17 changes: 17 additions & 0 deletions ats/atsMachines/fluxScheduled.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def init(self):
self.coresPerGPU = 0
self.coresPerNode = int(self.numCores / self.numNodes)

# Strings used to determine which node a user wants the test to run
# Used with same_node var
self.node_list = []

# Maintain for backwards compatability with projects
# Allow user to over-ride the coresPerNode
# Other schedulers call this npMax, but for flux we are calling this coresPerNode
Expand Down Expand Up @@ -263,6 +267,19 @@ def calculateCommandList(self, test):

ret.append(f"-t{max_time}")



import pprint

same_node = test.options.get('same_node', None)
if same_node is not None:
if same_node not in self.node_list:
self.node_list.append(same_node)
pprint.pprint(self.node_list)
print(f"This is the node that we are trying to run on:{self.node_list.index(same_node) % self.numNodes}")
ret.append(f"--requires=-rank:{self.node_list.index(same_node) % self.numNodes}")


"""
Need to set -n{np} and -c{test.cpus_per_task}. But we also need to account for accessing
GPUS using flux. In testing flux outside of ATS it is evident that one needs to increase the -c option
Expand Down
10 changes: 5 additions & 5 deletions ats/atsMachines/lsf_asq.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,15 +445,15 @@ def calculateCommandList(self, test):
"--env", str_omp_proc_bind,
"-N", str(int(test.num_nodes)),
"-n", str(np)
] + str_lrun_jsrun_args.split() + str_mpibind + commandList
] + str_lrun_jsrun_args.split() + [ str_mpibind ] + commandList
else :
return ["lrun",
str_smpi,
"--env", str_omp_display_env,
"--env", str_omp_num_threads,
"--env", str_omp_proc_bind,
"-n", str(np)
] + str_lrun_jsrun_args.split() + str_mpibind + commandList
] + str_lrun_jsrun_args.split() + [ str_mpibind ] + commandList
else:
if ( test.num_nodes > 0) :
return ["lrun",
Expand Down Expand Up @@ -546,15 +546,15 @@ def calculateCommandList(self, test):

if test.jsrun_bind == "unset":
if self.mpibind:
str_lrun_jsrun_args = str_lrun_jsrun_args + " -b none " + str_mpibind
str_lrun_jsrun_args = str_lrun_jsrun_args + " -b none " + [ str_mpibind ]
else:
if self.old_defaults:
str_lrun_jsrun_args = str_lrun_jsrun_args + " -b rs "
else:
str_lrun_jsrun_args = str_lrun_jsrun_args + " -b rs "
else:
if self.mpibind:
str_lrun_jsrun_args = str_lrun_jsrun_args + " -b " + test.jsrun_bind + " " + str_mpibind
str_lrun_jsrun_args = str_lrun_jsrun_args + " -b " + test.jsrun_bind + " " + [ str_mpibind ]
else:
str_lrun_jsrun_args = str_lrun_jsrun_args + " -b " + test.jsrun_bind

Expand Down Expand Up @@ -594,7 +594,7 @@ def calculateCommandList(self, test):
if str_lrun_jsrun_args == "unset":
str_lrun_jsrun_args = str_mpibind
else:
str_lrun_jsrun_args = str_lrun_jsrun_args + " " + str_mpibind
str_lrun_jsrun_args = str_lrun_jsrun_args + " " + [ str_mpibind ]

cpu_per_rs = np * test.cpus_per_task

Expand Down
7 changes: 5 additions & 2 deletions ats/tools/atslite1.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,13 @@ def main():
clean_found = False
exclusive_found = False
nosub_found = False
level_found = False

for index, arg in enumerate(sys.argv):
#print arg
if (arg.find('=') >= 0):
# print("SAD DEBUG index=%i arg=%s" % (index, arg))
if (arg.startswith('level=') >= 0):
level_found = True
elif (arg.find('=') >= 0):
(key, val) = arg.split('=',1)
sys.argv[index] = key + '="' + val + '"'
elif (arg.find('exclusive') >= 0):
Expand Down
7 changes: 5 additions & 2 deletions ats/tools/atslite3.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@ def main():
clean_found = False
exclusive_found = False
nosub_found = False
level_found = False

for index, arg in enumerate(sys.argv):
#print arg
if (arg.find('=') >= 0):
if (arg.startswith('level=') >= 0):
level_found = True
elif (arg.find('=') >= 0):
(key, val) = arg.split('=',1)
sys.argv[index] = key + '="' + val + '"'
elif (arg.find('exclusive') >= 0):
Expand Down
41 changes: 12 additions & 29 deletions test/HelloATS/READ.ME
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,24 @@ It may also present some which are HALTED if errors are detected in slurm
or mpi init by ATS.

--------------------------------------------------------------------------------
Toss 3 (rzgenie, etc.). Only use Slurm
Toss 4 Testing on slurm based toss4 machines such as rzwhippet
--------------------------------------------------------------------------------
export PATH=${PATH}:/usr/gapps/ats/scripts
module load python/3.8.2
// Modify this line to be the ats install you are testing
module load python/3.9.12
export PATH=/usr/gapps/ats/${SYS_TYPE}/7.0.${USER}/bin:$PATH

mpicc hello_ats.c <- build the code
./create_test_ats.py <- create the ats test file
atslite1 test.ats <- test using slurm
mpicc hello_ats.c <- build the code
./create_test_ats.py <- create the ats test file
atslite1 test.ats <- test using slurm
export -n MACHINE_TYPE

# POODLE TEST LINE (or any other system without cross node MPI)
salloc -N1 -p pdebug --exclusive
atslite1 test.ats
exit

The end of the run should include:

ATS SUMMARY May 16, 2023 14:20:16
FAILED: 10 a(a.out_1), a(a.out_3), a(a.out_9), a(a.out_11), ats_check_log#7, ats_check_log#8, a(a.out_17), a(a.out_19), a(a.out_25), a(a.out_27)
PASSED: 18
SKIPPED: 8
Expand Down Expand Up @@ -71,28 +76,6 @@ Toss 4 Cray rzvernal with rocm 5.5
PASSED: 18
SKIPPED: 8

--------------------------------------------------------------------------------
Toss 4 Testing on slurm based toss4 machines such as rzwhippet
--------------------------------------------------------------------------------
export PATH=${PATH}:/usr/gapps/ats/scripts
module load python/3.9.12
export PATH=/usr/gapps/ats/${SYS_TYPE}/7.0.${USER}/bin:$PATH

mpicc hello_ats.c <- build the code
./create_test_ats.py <- create the ats test file
atslite1 test.ats <- test using slurm
export -n MACHINE_TYPE

# POODLE TEST LINE (or any other system without cross node MPI)
salloc -N1 -p pdebug --exclusive
atslite1 test.ats
exit


ATS SUMMARY May 16, 2023 14:20:16
FAILED: 10 a(a.out_1), a(a.out_3), a(a.out_9), a(a.out_11), ats_check_log#7, ats_check_log#8, a(a.out_17), a(a.out_19), a(a.out_25), a(a.out_27)
PASSED: 18
SKIPPED: 8

--------------------------------------------------------------------------------
Blueos (rzansel) Uses LSF
Expand Down
26 changes: 21 additions & 5 deletions test/HelloATS/create_test_ats.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,30 +27,46 @@ def get_test_lines_generator():
# Duplicate items in nprocs: [1, 2, ..., 64] --> [1, 1, 2, 2, ..., 64, 64]
nprocs = sorted(2 * [1, 2, 3, 4, 5, 6, 7, 8, 16 ])

test_line = "t%d=test (executable='./a.out', clas='%s', " \
test_line = "t%d=test (executable='./a.out', level=20, clas='%s', " \
"label='a.out_%d', np=%d, sandbox=False)\n"
return (test_line % (test_num, arg_, test_num, num_proc)
for test_num, arg_, num_proc in zip(range(1, 44, 2), clas, nprocs))


def get_testif_lines_generator():
"""Returns a generator containing testifs (conditional tests)."""
testif_line = "t%d=testif(t%d, executable = my_checker, " \
"clas = t%d.outname, nosrun=True)\n"
"level=20, clas = t%d.outname, nosrun=True)\n"
return (testif_line % (testif_num, testif_num - 1, testif_num - 1)
for testif_num in range(2, 45, 2))

def get_test_lines_generator_level_10():
"""Returns a generator containing independent tests."""
clas = itertools.cycle(['', 'arg1 arg2 arg3'])
nprocs = sorted(2 * [1, 2, 3, 4 ])
labels = ('the', 'cat', 'in', 'hat', 'chased', 'big', 'red', 'fox')

test_line = "test(executable='./a.out', level=10, clas='%s', " \
"label='%s', np=%d, nt=1)\n"
return (test_line % (arg_, label, num_proc)
for arg_, label, num_proc in zip(clas, labels, nprocs) )



if __name__ == "__main__":
TEST_ATS = "test.ats"
FILE_HEADER = get_file_header()
TEST_LINES = get_test_lines_generator()
FILE_HEADER = get_file_header()
TEST_LINES = get_test_lines_generator()
TESTIF_LINES = get_testif_lines_generator()
TEST10_LINES = get_test_lines_generator_level_10()

with open(TEST_ATS, 'w') as ofp:
ofp.write(FILE_HEADER)

for test, testif in zip(TEST_LINES, TESTIF_LINES):
ofp.write(test)
ofp.write(testif)

for test in TEST10_LINES:
ofp.write(test)

print(f"Most Excellent! Created ats test file {TEST_ATS}\n")

0 comments on commit ca6fa95

Please sign in to comment.