From ca6fa95e3ebe4b54b94d0ed7eebd8250b8d32a73 Mon Sep 17 00:00:00 2001
From: Shawn Dawson <dawson6@llnl.gov>
Date: Mon, 28 Aug 2023 14:09:22 -0700
Subject: [PATCH] Feature/dawson/atslite1 args (#150)

* First itteration of running on same node under flux

* Typo on option fixed

* Updating to test --level and --filter options with atslite1

* Further Updating to test --level and --filter options with atslite1

* Fix issue reported by Ben Liu regarding lrun and old_default and mpibind

---------

Co-authored-by: MishaZakharchanka <zakharchanka1@llnl.gov>
---
 ats/atsMachines/fluxScheduled.py | 17 +++++++++++++
 ats/atsMachines/lsf_asq.py       | 10 ++++----
 ats/tools/atslite1.py            |  7 ++++--
 ats/tools/atslite3.py            |  7 ++++--
 test/HelloATS/READ.ME            | 41 ++++++++++----------------------
 test/HelloATS/create_test_ats.py | 26 ++++++++++++++++----
 6 files changed, 65 insertions(+), 43 deletions(-)

diff --git a/ats/atsMachines/fluxScheduled.py b/ats/atsMachines/fluxScheduled.py
index b4abe4e..13aaf91 100755
--- a/ats/atsMachines/fluxScheduled.py
+++ b/ats/atsMachines/fluxScheduled.py
@@ -62,6 +62,10 @@ def init(self):
             self.coresPerGPU = 0
         self.coresPerNode = int(self.numCores / self.numNodes)
 
+        # Strings used to determine which node a user wants the test to run
+        # Used with same_node var
+        self.node_list = []
+
         # Maintain for backwards compatability with projects
         # Allow user to over-ride the coresPerNode
         # Other schedulers call this npMax, but for flux we are calling this coresPerNode
@@ -263,6 +267,19 @@ def calculateCommandList(self, test):
 
             ret.append(f"-t{max_time}")
 
+
+
+        import pprint
+
+        same_node = test.options.get('same_node', None)
+        if same_node is not None:
+            if same_node not in self.node_list:
+                self.node_list.append(same_node)
+                pprint.pprint(self.node_list)
+            print(f"This is the node that we are trying to run on:{self.node_list.index(same_node) % self.numNodes}")
+            ret.append(f"--requires=-rank:{self.node_list.index(same_node) % self.numNodes}")
+
+
         """
         Need to set -n{np} and -c{test.cpus_per_task}.  But we also need to account for accessing
         GPUS using flux.  In testing flux outside of ATS it is evident that one needs to increase the -c option
diff --git a/ats/atsMachines/lsf_asq.py b/ats/atsMachines/lsf_asq.py
index 97c6ab3..7c5976a 100644
--- a/ats/atsMachines/lsf_asq.py
+++ b/ats/atsMachines/lsf_asq.py
@@ -445,7 +445,7 @@ def calculateCommandList(self, test):
                                     "--env", str_omp_proc_bind,
                                     "-N", str(int(test.num_nodes)),
                                     "-n", str(np)
-                                    ] + str_lrun_jsrun_args.split() + str_mpibind + commandList
+                                    ] + str_lrun_jsrun_args.split() + [ str_mpibind ] + commandList
                         else :
                             return ["lrun",
                                     str_smpi,
@@ -453,7 +453,7 @@ def calculateCommandList(self, test):
                                     "--env", str_omp_num_threads,
                                     "--env", str_omp_proc_bind,
                                     "-n", str(np)
-                                    ] + str_lrun_jsrun_args.split() + str_mpibind + commandList
+                                    ] + str_lrun_jsrun_args.split() + [ str_mpibind ] + commandList
                     else:
                         if ( test.num_nodes > 0) :
                             return ["lrun",
@@ -546,7 +546,7 @@ def calculateCommandList(self, test):
 
                     if test.jsrun_bind == "unset":
                         if self.mpibind:
-                            str_lrun_jsrun_args = str_lrun_jsrun_args + " -b none " + str_mpibind
+                            str_lrun_jsrun_args = str_lrun_jsrun_args + " -b none " + [ str_mpibind ]
                         else:
                             if self.old_defaults:
                                 str_lrun_jsrun_args = str_lrun_jsrun_args + " -b rs "
@@ -554,7 +554,7 @@ def calculateCommandList(self, test):
                                 str_lrun_jsrun_args = str_lrun_jsrun_args + " -b rs "
                     else:
                         if self.mpibind:
-                            str_lrun_jsrun_args = str_lrun_jsrun_args + " -b " + test.jsrun_bind + " " + str_mpibind
+                            str_lrun_jsrun_args = str_lrun_jsrun_args + " -b " + test.jsrun_bind + " " + [ str_mpibind ]
                         else:
                             str_lrun_jsrun_args = str_lrun_jsrun_args + " -b " + test.jsrun_bind
 
@@ -594,7 +594,7 @@ def calculateCommandList(self, test):
                         if str_lrun_jsrun_args == "unset":
                             str_lrun_jsrun_args = str_mpibind
                         else:
-                            str_lrun_jsrun_args = str_lrun_jsrun_args + " " + str_mpibind
+                            str_lrun_jsrun_args = str_lrun_jsrun_args + " " + [ str_mpibind ]
 
                     cpu_per_rs = np * test.cpus_per_task
 
diff --git a/ats/tools/atslite1.py b/ats/tools/atslite1.py
index ada970e..fb45908 100755
--- a/ats/tools/atslite1.py
+++ b/ats/tools/atslite1.py
@@ -30,10 +30,13 @@ def main():
     clean_found = False
     exclusive_found = False
     nosub_found = False
+    level_found = False
 
     for index, arg in enumerate(sys.argv):
-        #print arg
-        if (arg.find('=') >= 0):
+        # print("SAD DEBUG index=%i arg=%s" % (index, arg))
+        if (arg.startswith('level=') >= 0):
+            level_found = True
+        elif (arg.find('=') >= 0):
             (key, val) = arg.split('=',1)
             sys.argv[index] = key + '="' + val + '"'
         elif (arg.find('exclusive') >= 0):
diff --git a/ats/tools/atslite3.py b/ats/tools/atslite3.py
index 5e926ae..6b9abeb 100755
--- a/ats/tools/atslite3.py
+++ b/ats/tools/atslite3.py
@@ -28,9 +28,12 @@ def main():
     clean_found = False
     exclusive_found = False
     nosub_found = False
+    level_found = False
+
     for index, arg in enumerate(sys.argv):
-        #print arg
-        if (arg.find('=') >= 0):
+        if (arg.startswith('level=') >= 0):
+            level_found = True
+        elif (arg.find('=') >= 0):
             (key, val) = arg.split('=',1)
             sys.argv[index] = key + '="' + val + '"'
         elif (arg.find('exclusive') >= 0):
diff --git a/test/HelloATS/READ.ME b/test/HelloATS/READ.ME
index 3de4ffe..d685278 100644
--- a/test/HelloATS/READ.ME
+++ b/test/HelloATS/READ.ME
@@ -30,19 +30,24 @@ It may also present some which are HALTED if errors are detected in slurm
 or mpi init by ATS.
 
 --------------------------------------------------------------------------------
-Toss 3 (rzgenie, etc.).  Only use Slurm
+Toss 4   Testing on slurm based toss4 machines such as rzwhippet
 --------------------------------------------------------------------------------
     export PATH=${PATH}:/usr/gapps/ats/scripts
-    module load python/3.8.2
-        // Modify this line to be the ats install you are testing
+    module load python/3.9.12
     export PATH=/usr/gapps/ats/${SYS_TYPE}/7.0.${USER}/bin:$PATH
 
-    mpicc hello_ats.c        <- build the code
-    ./create_test_ats.py     <- create the ats test file
-    atslite1 test.ats        <- test using slurm
+    mpicc hello_ats.c            <- build the code
+    ./create_test_ats.py         <- create the ats test file
+    atslite1 test.ats           <- test using slurm
+    export -n MACHINE_TYPE
+
+    # POODLE TEST LINE (or any other system without cross node MPI)
+    salloc -N1 -p pdebug --exclusive
+    atslite1 test.ats 
+    exit
 
-    The end of the run should include:
 
+    ATS SUMMARY May 16, 2023 14:20:16
     FAILED:  10 a(a.out_1), a(a.out_3), a(a.out_9), a(a.out_11), ats_check_log#7, ats_check_log#8, a(a.out_17), a(a.out_19), a(a.out_25), a(a.out_27)
     PASSED:   18
     SKIPPED:  8
@@ -71,28 +76,6 @@ Toss 4  Cray rzvernal with rocm 5.5
     PASSED:   18
     SKIPPED:  8
 
---------------------------------------------------------------------------------
-Toss 4   Testing on slurm based toss4 machines such as rzwhippet
---------------------------------------------------------------------------------
-    export PATH=${PATH}:/usr/gapps/ats/scripts
-    module load python/3.9.12
-    export PATH=/usr/gapps/ats/${SYS_TYPE}/7.0.${USER}/bin:$PATH
-
-    mpicc hello_ats.c            <- build the code
-    ./create_test_ats.py         <- create the ats test file
-    atslite1 test.ats           <- test using slurm
-    export -n MACHINE_TYPE
-
-    # POODLE TEST LINE (or any other system without cross node MPI)
-    salloc -N1 -p pdebug --exclusive
-    atslite1 test.ats 
-    exit
-
-
-    ATS SUMMARY May 16, 2023 14:20:16
-    FAILED:  10 a(a.out_1), a(a.out_3), a(a.out_9), a(a.out_11), ats_check_log#7, ats_check_log#8, a(a.out_17), a(a.out_19), a(a.out_25), a(a.out_27)
-    PASSED:   18
-    SKIPPED:  8
 
 --------------------------------------------------------------------------------
 Blueos (rzansel)  Uses LSF
diff --git a/test/HelloATS/create_test_ats.py b/test/HelloATS/create_test_ats.py
index 5b57244..f75f22f 100755
--- a/test/HelloATS/create_test_ats.py
+++ b/test/HelloATS/create_test_ats.py
@@ -27,30 +27,46 @@ def get_test_lines_generator():
     # Duplicate items in nprocs: [1, 2, ..., 64] --> [1, 1, 2, 2, ..., 64, 64]
     nprocs = sorted(2 * [1, 2, 3, 4, 5, 6, 7, 8, 16 ])
 
-    test_line = "t%d=test  (executable='./a.out', clas='%s', " \
+    test_line = "t%d=test  (executable='./a.out', level=20, clas='%s', " \
                 "label='a.out_%d', np=%d, sandbox=False)\n"
     return (test_line % (test_num, arg_, test_num, num_proc)
             for test_num, arg_, num_proc in zip(range(1, 44, 2), clas, nprocs))
 
-
 def get_testif_lines_generator():
     """Returns a generator containing testifs (conditional tests)."""
     testif_line = "t%d=testif(t%d, executable = my_checker, " \
-                  "clas = t%d.outname, nosrun=True)\n"
+                  "level=20, clas = t%d.outname, nosrun=True)\n"
     return (testif_line % (testif_num, testif_num - 1, testif_num - 1)
             for testif_num in range(2, 45, 2))
 
+def get_test_lines_generator_level_10():
+    """Returns a generator containing independent tests."""
+    clas = itertools.cycle(['', 'arg1 arg2 arg3'])
+    nprocs = sorted(2 * [1, 2, 3, 4 ])
+    labels = ('the', 'cat', 'in', 'hat', 'chased', 'big', 'red', 'fox')
+
+    test_line = "test(executable='./a.out', level=10, clas='%s', " \
+                "label='%s', np=%d, nt=1)\n"
+    return (test_line % (arg_, label, num_proc)
+            for arg_, label, num_proc in zip(clas, labels, nprocs) )
+
+
 
 if __name__ == "__main__":
     TEST_ATS = "test.ats"
-    FILE_HEADER = get_file_header()
-    TEST_LINES = get_test_lines_generator()
+    FILE_HEADER  = get_file_header()
+    TEST_LINES   = get_test_lines_generator()
     TESTIF_LINES = get_testif_lines_generator()
+    TEST10_LINES = get_test_lines_generator_level_10()
 
     with open(TEST_ATS, 'w') as ofp:
         ofp.write(FILE_HEADER)
+
         for test, testif in zip(TEST_LINES, TESTIF_LINES):
             ofp.write(test)
             ofp.write(testif)
 
+        for test in TEST10_LINES:
+            ofp.write(test)
+
     print(f"Most Excellent! Created ats test file {TEST_ATS}\n")