Add Pytest Fixtures to Test Suite (#456)

* begin work on integration refactor; create fixtures and initial tests * update CHANGELOG and run fix-style * add pytest fixtures and README explaining them * add tests to demonstrate how to use the fixtures * move/rename some files and modify integration's README * add password change to redis.pass file * fix lint issues * modify redis pwd for test server to be constant for each test * fix lint issue only caught on github ci
LLNL · Nov 2, 2023 · 5dc8206 · 5dc8206
1 parent 89093dd
commit 5dc8206
Show file tree

Hide file tree

Showing 10 changed files with 647 additions and 17 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,11 @@ All notable changes to Merlin will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+### Added
+- Pytest fixtures in the `conftest.py` file of the integration test suite
+- Tests for the `celeryadapter.py` module
+
 ## [1.11.1]
 ### Fixed
 - Typo in `batch.py` that caused lsf launches to fail (`ALL_SGPUS` changed to `ALL_GPUS`)

diff --git a/merlin/common/tasks.py b/merlin/common/tasks.py
@@ -480,7 +480,7 @@ def expand_tasks_with_samples(  # pylint: disable=R0913,R0914
             if not found_tasks:
                 for next_index_path, next_index in sample_index.traverse(conditional=condition):
                     LOG.info(
-                        f"generating next step for range {next_index.min}:{next_index.max} {next_index.max-next_index.min}"
+                        f"generating next step for range {next_index.min}:{next_index.max} {next_index.max - next_index.min}"
                     )
                     next_index.name = next_index_path
 

diff --git a/merlin/server/server_commands.py b/merlin/server/server_commands.py
@@ -92,6 +92,9 @@ def config_server(args: Namespace) -> None:  # pylint: disable=R0912
         redis_users = RedisUsers(server_config.container.get_user_file_path())
         redis_users.set_password("default", args.password)
         redis_users.write()
+        pass_file = server_config.container.get_pass_file_path()
+        with open(pass_file, "w") as pfile:
+            pfile.write(args.password)
 
     redis_config.set_directory(args.directory)
 

diff --git a/merlin/study/celeryadapter.py b/merlin/study/celeryadapter.py
@@ -37,6 +37,7 @@
 import subprocess
 import time
 from contextlib import suppress
+from typing import Dict, List, Optional
 
 from merlin.study.batch import batch_check_parallel, batch_worker_launch
 from merlin.utils import apply_list_of_regex, check_machines, get_procs, get_yaml_var, is_running
@@ -69,23 +70,31 @@ def run_celery(study, run_mode=None):
     queue_merlin_study(study, adapter_config)
 
 
-def get_running_queues():
+def get_running_queues(celery_app_name: str, test_mode: bool = False) -> List[str]:
     """
-    Check for running celery workers with -Q queues
-    and return a unique list of the queues
+    Check for running celery workers by looking at the currently running processes.
+    If there are running celery workers, we'll pull the queues from the -Q tag in the
+    process command. The list returned here will contain only unique celery queue names.
+    This must be run on the allocation where the workers are running.
 
-    Must be run on the allocation where the workers are running
+    :param `celery_app_name`: The name of the celery app (typically merlin here unless testing)
+    :param `test_mode`: If True, run this function in test mode
+    :returns: A unique list of celery queues with workers attached to them
     """
     running_queues = []
 
-    if not is_running("celery worker"):
+    if not is_running(f"{celery_app_name} worker"):
         return running_queues
 
-    procs = get_procs("celery")
+    proc_name = "celery" if not test_mode else "sh"
+    procs = get_procs(proc_name)
     for _, lcmd in procs:
         lcmd = list(filter(None, lcmd))
         cmdline = " ".join(lcmd)
         if "-Q" in cmdline:
+            if test_mode:
+                echo_cmd = lcmd.pop(2)
+                lcmd.extend(echo_cmd.split())
             running_queues.extend(lcmd[lcmd.index("-Q") + 1].split(","))
 
     running_queues = list(set(running_queues))
@@ -155,19 +164,20 @@ def get_active_workers(app):
     return worker_queue_map
 
 
-def celerize_queues(queues):
+def celerize_queues(queues: List[str], config: Optional[Dict] = None):
     """
     Celery requires a queue tag to be prepended to their
     queues so this function will 'celerize' every queue in
     a list you provide it by prepending the queue tag.
 
-    :param `queues`: A list of queues that need the queue
-                     tag prepended.
+    :param `queues`: A list of queues that need the queue tag prepended.
+    :param `config`: A dict of configuration settings
     """
-    from merlin.config.configfile import CONFIG  # pylint: disable=C0415
+    if config is None:
+        from merlin.config.configfile import CONFIG as config  # pylint: disable=C0415
 
     for i, queue in enumerate(queues):
-        queues[i] = f"{CONFIG.celery.queue_tag}{queue}"
+        queues[i] = f"{config.celery.queue_tag}{queue}"
 
 
 def _build_output_table(worker_list, output_table):
@@ -462,7 +472,7 @@ def start_celery_workers(spec, steps, celery_args, disable_logs, just_return_com
         running_queues.extend(local_queues)
         queues = queues.split(",")
         if not overlap:
-            running_queues.extend(get_running_queues())
+            running_queues.extend(get_running_queues("merlin"))
             # Cache the queues from this worker to use to test
             # for existing queues in any subsequent workers.
             # If overlap is True, then do not check the local queues.

diff --git a/requirements/dev.txt b/requirements/dev.txt
@@ -10,3 +10,4 @@ twine
 sphinx>=2.0.0
 alabaster
 johnnydep
+deepdiff
diff --git a/tests/README.md b/tests/README.md
@@ -0,0 +1,152 @@
+# Tests
+
+This directory utilizes pytest to create and run our test suite.
+Here we use pytest fixtures to create a local redis server and a celery app for testing.
+
+This directory is organized like so:
+- `conftest.py` - The script containing all fixtures for our tests
+- `unit/` - The directory containing unit tests
+    - `test_*.py` - The actual test scripts to run
+- `integration/` - The directory containing integration tests
+    <!-- - `test_*.py` - The actual test scripts to run -->
+    - `definitions.py` - The test definitions
+    - `run_tests.py` - The script to run the tests defined in `definitions.py`
+    - `conditions.py` - The conditions to test against
+
+## How to Run
+
+Before running any tests:
+
+1. Activate your virtual environment with Merlin's dev requirements installed
+2. Navigate to the tests folder where this README is located
+
+To run the entire test suite:
+
+```
+python -m pytest
+```
+
+To run a specific test file:
+
+```
+python -m pytest /path/to/test_specific_file.py
+```
+
+To run a certain test class within a specific test file:
+
+```
+python -m pytest /path/to/test_specific_file.py::TestCertainClass
+```
+
+To run one unique test:
+
+```
+python -m pytest /path/to/test_specific_file.py::TestCertainClass::test_unique_test
+```
+
+## Killing the Test Server
+
+In case of an issue with the test suite, or if you stop the tests with `ctrl+C`, you may need to stop
+the server manually. This can be done with:
+
+```
+redis-cli
+127.0.0.1:6379> AUTH merlin-test-server
+127.0.0.1:6379> shutdown
+not connected> quit
+```
+
+## The Fixture Process Explained
+
+Pytest fixtures play a fundamental role in establishing a consistent foundation for test execution,
+thus ensuring reliable and predictable test outcomes. This section will delve into essential aspects
+of these fixtures, including how to integrate fixtures into tests, the utilization of fixtures within other fixtures,
+their scope, and the yielding of fixture results.
+
+### How to Integrate Fixtures Into Tests
+
+Probably the most important part of fixtures is understanding how to use them. Luckily, this process is very
+simple and can be dumbed down to 2 steps:
+
+1. Create a fixture in the `conftest.py` file by using the `@pytest.fixture` decorator. For example:
+
+```
+@pytest.fixture
+def dummy_fixture():
+    return "hello world"
+```
+
+2. Use it as an argument in a test function (you don't even need to import it!):
+
+```
+def test_dummy(dummy_fixture):
+    assert dummy_fixture == "hello world"
+```
+
+For more information, see [Pytest's documentation](https://docs.pytest.org/en/7.1.x/how-to/fixtures.html#how-to-use-fixtures).
+
+### Fixtureception
+
+One of the coolest and most useful aspects of fixtures that we utilize in this test suite is the ability for
+fixtures to be used within other fixtures. For more info on this from pytest, see
+[here](https://docs.pytest.org/en/7.1.x/how-to/fixtures.html#fixtures-can-request-other-fixtures).
+
+Pytest will handle fixtures within fixtures in a stack-based way. Let's look at how creating the `redis_pass`
+fixture from our `conftest.py` file works in order to illustrate the process.
+1. First, we start by telling pytest that we want to use the `redis_pass` fixture by providing it as an argument
+to a test/fixture:
+
+```
+def test_example(redis_pass):
+    ...
+```
+
+2. Now pytest will find the `redis_pass` fixture and put it at the top of the stack to be created. However,
+it'll see that this fixture requires another fixture `merlin_server_dir` as an argument:
+
+```
+@pytest.fixture(scope="session")
+def redis_pass(merlin_server_dir):
+    ...
+```
+
+3. Pytest then puts the `merlin_server_dir` fixture at the top of the stack, but similarly it sees that this fixture
+requires yet another fixture `temp_output_dir`:
+
+```
+@pytest.fixture(scope="session")
+def merlin_server_dir(temp_output_dir: str) -> str:
+    ...
+```
+
+4. This process continues until it reaches a fixture that doesn't require any more fixtures. At this point the base
+fixture is created and pytest will start working its way back up the stack to the first fixture it looked at (in this
+case `redis_pass`).
+
+5. Once all required fixtures are created, execution will be returned to the test which can now access the fixture
+that was requested (`redis_pass`).
+
+As you can see, if we have to re-do this process for every test it could get pretty time intensive. This is where fixture
+scopes come to save the day.
+
+### Fixture Scopes
+
+There are several different scopes that you can set for fixtures. The majority of our fixtures use a `session`
+scope so that we only have to create the fixtures one time (as some of them can take a few seconds to set up).
+The goal is to create fixtures with the most general use-case in mind so that we can re-use them for larger
+scopes, which helps with efficiency.
+
+For more info on scopes, see
+[Pytest's Fixture Scope documentation](https://docs.pytest.org/en/6.2.x/fixture.html#scope-sharing-fixtures-across-classes-modules-packages-or-session).
+
+### Yielding Fixtures
+
+In several fixtures throughout our test suite, we need to run some sort of teardown for the fixture. For example,
+once we no longer need the `redis_server` fixture, we need to shut the server down so it stops using resources.
+This is where yielding fixtures becomes extremely useful.
+
+Using the `yield` keyword allows execution to be returned to a test that needs the fixture once the feature has
+been set up. After all tests using the fixture have been ran, execution will return to the fixture for us to run
+our teardown code.
+
+For more information on yielding fixtures, see [Pytest's documentation](https://docs.pytest.org/en/7.1.x/how-to/fixtures.html#teardown-cleanup-aka-fixture-finalization).