Feature/dawson/atslite1 args (#150)

* First itteration of running on same node under flux * Typo on option fixed * Updating to test --level and --filter options with atslite1 * Further Updating to test --level and --filter options with atslite1 * Fix issue reported by Ben Liu regarding lrun and old_default and mpibind --------- Co-authored-by: MishaZakharchanka <zakharchanka1@llnl.gov>
LLNL · Aug 28, 2023 · ca6fa95 · ca6fa95
1 parent b6024ab
commit ca6fa95
Show file tree

Hide file tree

Showing 6 changed files with 65 additions and 43 deletions.
diff --git a/ats/atsMachines/fluxScheduled.py b/ats/atsMachines/fluxScheduled.py
@@ -62,6 +62,10 @@ def init(self):
             self.coresPerGPU = 0
         self.coresPerNode = int(self.numCores / self.numNodes)
 
+        # Strings used to determine which node a user wants the test to run
+        # Used with same_node var
+        self.node_list = []
+
         # Maintain for backwards compatability with projects
         # Allow user to over-ride the coresPerNode
         # Other schedulers call this npMax, but for flux we are calling this coresPerNode
@@ -263,6 +267,19 @@ def calculateCommandList(self, test):
 
             ret.append(f"-t{max_time}")
 
+
+
+        import pprint
+
+        same_node = test.options.get('same_node', None)
+        if same_node is not None:
+            if same_node not in self.node_list:
+                self.node_list.append(same_node)
+                pprint.pprint(self.node_list)
+            print(f"This is the node that we are trying to run on:{self.node_list.index(same_node) % self.numNodes}")
+            ret.append(f"--requires=-rank:{self.node_list.index(same_node) % self.numNodes}")
+
+
         """
         Need to set -n{np} and -c{test.cpus_per_task}.  But we also need to account for accessing
         GPUS using flux.  In testing flux outside of ATS it is evident that one needs to increase the -c option

diff --git a/ats/atsMachines/lsf_asq.py b/ats/atsMachines/lsf_asq.py
@@ -445,15 +445,15 @@ def calculateCommandList(self, test):
                                     "--env", str_omp_proc_bind,
                                     "-N", str(int(test.num_nodes)),
                                     "-n", str(np)
-                                    ] + str_lrun_jsrun_args.split() + str_mpibind + commandList
+                                    ] + str_lrun_jsrun_args.split() + [ str_mpibind ] + commandList
                         else :
                             return ["lrun",
                                     str_smpi,
                                     "--env", str_omp_display_env,
                                     "--env", str_omp_num_threads,
                                     "--env", str_omp_proc_bind,
                                     "-n", str(np)
-                                    ] + str_lrun_jsrun_args.split() + str_mpibind + commandList
+                                    ] + str_lrun_jsrun_args.split() + [ str_mpibind ] + commandList
                     else:
                         if ( test.num_nodes > 0) :
                             return ["lrun",
@@ -546,15 +546,15 @@ def calculateCommandList(self, test):
 
                     if test.jsrun_bind == "unset":
                         if self.mpibind:
-                            str_lrun_jsrun_args = str_lrun_jsrun_args + " -b none " + str_mpibind
+                            str_lrun_jsrun_args = str_lrun_jsrun_args + " -b none " + [ str_mpibind ]
                         else:
                             if self.old_defaults:
                                 str_lrun_jsrun_args = str_lrun_jsrun_args + " -b rs "
                             else:
                                 str_lrun_jsrun_args = str_lrun_jsrun_args + " -b rs "
                     else:
                         if self.mpibind:
-                            str_lrun_jsrun_args = str_lrun_jsrun_args + " -b " + test.jsrun_bind + " " + str_mpibind
+                            str_lrun_jsrun_args = str_lrun_jsrun_args + " -b " + test.jsrun_bind + " " + [ str_mpibind ]
                         else:
                             str_lrun_jsrun_args = str_lrun_jsrun_args + " -b " + test.jsrun_bind
 
@@ -594,7 +594,7 @@ def calculateCommandList(self, test):
                         if str_lrun_jsrun_args == "unset":
                             str_lrun_jsrun_args = str_mpibind
                         else:
-                            str_lrun_jsrun_args = str_lrun_jsrun_args + " " + str_mpibind
+                            str_lrun_jsrun_args = str_lrun_jsrun_args + " " + [ str_mpibind ]
 
                     cpu_per_rs = np * test.cpus_per_task
 

diff --git a/ats/tools/atslite1.py b/ats/tools/atslite1.py
@@ -30,10 +30,13 @@ def main():
     clean_found = False
     exclusive_found = False
     nosub_found = False
+    level_found = False
 
     for index, arg in enumerate(sys.argv):
-        #print arg
-        if (arg.find('=') >= 0):
+        # print("SAD DEBUG index=%i arg=%s" % (index, arg))
+        if (arg.startswith('level=') >= 0):
+            level_found = True
+        elif (arg.find('=') >= 0):
             (key, val) = arg.split('=',1)
             sys.argv[index] = key + '="' + val + '"'
         elif (arg.find('exclusive') >= 0):

diff --git a/ats/tools/atslite3.py b/ats/tools/atslite3.py
@@ -28,9 +28,12 @@ def main():
     clean_found = False
     exclusive_found = False
     nosub_found = False
+    level_found = False
+
     for index, arg in enumerate(sys.argv):
-        #print arg
-        if (arg.find('=') >= 0):
+        if (arg.startswith('level=') >= 0):
+            level_found = True
+        elif (arg.find('=') >= 0):
             (key, val) = arg.split('=',1)
             sys.argv[index] = key + '="' + val + '"'
         elif (arg.find('exclusive') >= 0):

diff --git a/test/HelloATS/READ.ME b/test/HelloATS/READ.ME
@@ -30,19 +30,24 @@ It may also present some which are HALTED if errors are detected in slurm
 or mpi init by ATS.
 
 --------------------------------------------------------------------------------
-Toss 3 (rzgenie, etc.).  Only use Slurm
+Toss 4   Testing on slurm based toss4 machines such as rzwhippet
 --------------------------------------------------------------------------------
     export PATH=${PATH}:/usr/gapps/ats/scripts
-    module load python/3.8.2
-        // Modify this line to be the ats install you are testing
+    module load python/3.9.12
     export PATH=/usr/gapps/ats/${SYS_TYPE}/7.0.${USER}/bin:$PATH
 
-    mpicc hello_ats.c        <- build the code
-    ./create_test_ats.py     <- create the ats test file
-    atslite1 test.ats        <- test using slurm
+    mpicc hello_ats.c            <- build the code
+    ./create_test_ats.py         <- create the ats test file
+    atslite1 test.ats           <- test using slurm
+    export -n MACHINE_TYPE
+
+    # POODLE TEST LINE (or any other system without cross node MPI)
+    salloc -N1 -p pdebug --exclusive
+    atslite1 test.ats 
+    exit
 
-    The end of the run should include:
 
+    ATS SUMMARY May 16, 2023 14:20:16
     FAILED:  10 a(a.out_1), a(a.out_3), a(a.out_9), a(a.out_11), ats_check_log#7, ats_check_log#8, a(a.out_17), a(a.out_19), a(a.out_25), a(a.out_27)
     PASSED:   18
     SKIPPED:  8
@@ -71,28 +76,6 @@ Toss 4  Cray rzvernal with rocm 5.5
     PASSED:   18
     SKIPPED:  8
 
---------------------------------------------------------------------------------
-Toss 4   Testing on slurm based toss4 machines such as rzwhippet
---------------------------------------------------------------------------------
-    export PATH=${PATH}:/usr/gapps/ats/scripts
-    module load python/3.9.12
-    export PATH=/usr/gapps/ats/${SYS_TYPE}/7.0.${USER}/bin:$PATH
-
-    mpicc hello_ats.c            <- build the code
-    ./create_test_ats.py         <- create the ats test file
-    atslite1 test.ats           <- test using slurm
-    export -n MACHINE_TYPE
-
-    # POODLE TEST LINE (or any other system without cross node MPI)
-    salloc -N1 -p pdebug --exclusive
-    atslite1 test.ats 
-    exit
-
-
-    ATS SUMMARY May 16, 2023 14:20:16
-    FAILED:  10 a(a.out_1), a(a.out_3), a(a.out_9), a(a.out_11), ats_check_log#7, ats_check_log#8, a(a.out_17), a(a.out_19), a(a.out_25), a(a.out_27)
-    PASSED:   18
-    SKIPPED:  8
 
 --------------------------------------------------------------------------------
 Blueos (rzansel)  Uses LSF

diff --git a/test/HelloATS/create_test_ats.py b/test/HelloATS/create_test_ats.py
@@ -27,30 +27,46 @@ def get_test_lines_generator():
     # Duplicate items in nprocs: [1, 2, ..., 64] --> [1, 1, 2, 2, ..., 64, 64]
     nprocs = sorted(2 * [1, 2, 3, 4, 5, 6, 7, 8, 16 ])
 
-    test_line = "t%d=test  (executable='./a.out', clas='%s', " \
+    test_line = "t%d=test  (executable='./a.out', level=20, clas='%s', " \
                 "label='a.out_%d', np=%d, sandbox=False)\n"
     return (test_line % (test_num, arg_, test_num, num_proc)
             for test_num, arg_, num_proc in zip(range(1, 44, 2), clas, nprocs))
 
-
 def get_testif_lines_generator():
     """Returns a generator containing testifs (conditional tests)."""
     testif_line = "t%d=testif(t%d, executable = my_checker, " \
-                  "clas = t%d.outname, nosrun=True)\n"
+                  "level=20, clas = t%d.outname, nosrun=True)\n"
     return (testif_line % (testif_num, testif_num - 1, testif_num - 1)
             for testif_num in range(2, 45, 2))
 
+def get_test_lines_generator_level_10():
+    """Returns a generator containing independent tests."""
+    clas = itertools.cycle(['', 'arg1 arg2 arg3'])
+    nprocs = sorted(2 * [1, 2, 3, 4 ])
+    labels = ('the', 'cat', 'in', 'hat', 'chased', 'big', 'red', 'fox')
+
+    test_line = "test(executable='./a.out', level=10, clas='%s', " \
+                "label='%s', np=%d, nt=1)\n"
+    return (test_line % (arg_, label, num_proc)
+            for arg_, label, num_proc in zip(clas, labels, nprocs) )
+
+
 
 if __name__ == "__main__":
     TEST_ATS = "test.ats"
-    FILE_HEADER = get_file_header()
-    TEST_LINES = get_test_lines_generator()
+    FILE_HEADER  = get_file_header()
+    TEST_LINES   = get_test_lines_generator()
     TESTIF_LINES = get_testif_lines_generator()
+    TEST10_LINES = get_test_lines_generator_level_10()
 
     with open(TEST_ATS, 'w') as ofp:
         ofp.write(FILE_HEADER)
+
         for test, testif in zip(TEST_LINES, TESTIF_LINES):
             ofp.write(test)
             ofp.write(testif)
 
+        for test in TEST10_LINES:
+            ofp.write(test)
+
     print(f"Most Excellent! Created ats test file {TEST_ATS}\n")