diff --git a/CHANGELOG.md b/CHANGELOG.md index f994a4d05..d709ccaa4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to Merlin will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] +### Fixed +- Typo in `batch.py` that caused lsf launches to fail (`ALL_SGPUS` changed to `ALL_GPUS`) + ## [1.11.0] ### Added - New reserved variable: diff --git a/merlin/study/batch.py b/merlin/study/batch.py index e02a65a32..1b96cd282 100644 --- a/merlin/study/batch.py +++ b/merlin/study/batch.py @@ -299,7 +299,7 @@ def construct_scheduler_legend(parsed_batch: Dict, nodes: int) -> Dict: "lsf": { "check cmd": ["jsrun", "--help"], "expected check output": b"jsrun", - "launch": f"jsrun -a 1 -c ALL_CPUS -g ALL_SGPUS --bind=none -n {nodes}", + "launch": f"jsrun -a 1 -c ALL_CPUS -g ALL_GPUS --bind=none -n {nodes}", }, # pbs is mainly a placeholder in case a user wants to try it (we don't have it at the lab so it's mostly untested) "pbs": { @@ -335,12 +335,16 @@ def construct_worker_launch_command(parsed_batch: Dict, nodes: int) -> str: scheduler_legend: Dict = construct_scheduler_legend(parsed_batch, nodes) workload_manager: str = get_batch_type(scheduler_legend) + LOG.debug(f"parsed_batch: {parsed_batch}") + if parsed_batch["btype"] == "pbs" and workload_manager == parsed_batch["btype"]: raise TypeError("The PBS scheduler is only enabled for 'batch: flux' type") if parsed_batch["btype"] == "slurm" and workload_manager not in ("lsf", "flux", "pbs"): workload_manager = "slurm" + LOG.debug(f"workload_manager: {workload_manager}") + try: launch_command = scheduler_legend[workload_manager]["launch"] except KeyError as e: # pylint: disable=C0103