From 8ef8ebb8c3c6a057e6d13fddc9f5146f5308f047 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Fri, 27 Jan 2023 09:29:10 +0100 Subject: [PATCH 001/188] Fix old Intel native CI --- .github/workflows/ci_ubuntu20.04_intel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_ubuntu20.04_intel.yaml b/.github/workflows/ci_ubuntu20.04_intel.yaml index c21b6cf3..40fc0595 100644 --- a/.github/workflows/ci_ubuntu20.04_intel.yaml +++ b/.github/workflows/ci_ubuntu20.04_intel.yaml @@ -19,4 +19,4 @@ jobs: - uses: actions/checkout@v2 - name: Run tests on Ubuntu 20.04 with Clang - run: docker run -v "${GITHUB_WORKSPACE}:/workspace" --env LC_ALL=C.UTF-8 --env LANG=C.UTF-8 --env DEBIAN_FRONTEND=noninteractive ubuntu:20.04 /bin/bash -c 'apt-get update && apt-get -y install wget && wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB && mv GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB /etc/apt/trusted.gpg.d/intel-sw-products.asc && echo "deb https://apt.repos.intel.com/oneapi all main" >/etc/apt/sources.list.d/oneAPI.list && apt-get update && apt-get -y dist-upgrade && apt-get -y install build-essential cmake git valgrind pkg-config python3 python3-pip python3-venv curl intel-oneapi-compiler-dpcpp-cpp-2021.1.1 intel-oneapi-compiler-fortran-2021.1.1 intel-oneapi-mpi-devel-2021.1.1 && apt-get -y remove libssl-dev zlib1g-dev && useradd -m -d /home/muscle3 muscle3 && su muscle3 -c -- "cp -r --preserve=mode /workspace /home/muscle3/muscle3" && su muscle3 -c -- "pip3 install --user -U \"pip<22\" setuptools wheel" && su muscle3 -c -- "pip3 install --user \"ymmsl>=0.13.0,<0.14\" qcg-pilotjob==0.13.1" && su muscle3 -c -s /bin/bash -- "cd /home/muscle3/muscle3 && . /opt/intel/oneapi/setvars.sh && CXX=icpx MPICXX=\"mpiicpc -cxx=icpx\" FC=ifx MPIFC=\"mpiifort -fc=ifx\" make test_examples"' + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" --env LC_ALL=C.UTF-8 --env LANG=C.UTF-8 --env DEBIAN_FRONTEND=noninteractive ubuntu:20.04 /bin/bash -c 'apt-get update && apt-get -y install wget && wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB && mv GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB /etc/apt/trusted.gpg.d/intel-sw-products.asc && echo "deb https://apt.repos.intel.com/oneapi all main" >/etc/apt/sources.list.d/oneAPI.list && apt-get update && apt-get -y dist-upgrade && apt-get -y install build-essential cmake git valgrind pkg-config python3 python3-pip python3-venv curl intel-oneapi-compiler-dpcpp-cpp-2021.1.1 intel-oneapi-compiler-fortran-2021.1.1 intel-oneapi-mpi-devel-2021.1.1 && apt-get -y remove libssl-dev zlib1g-dev && useradd -m -d /home/muscle3 muscle3 && su muscle3 -c -- "cp -r --preserve=mode /workspace /home/muscle3/muscle3" && su muscle3 -c -- "pip3 install --user -U \"pip<22\" setuptools wheel" && su muscle3 -c -- "pip3 install --user \"ymmsl>=0.13.0,<0.14\" qcg-pilotjob==0.13.1" && su muscle3 -s /bin/bash -c -- "cd /home/muscle3/muscle3 && . /opt/intel/oneapi/setvars.sh && CXX=icpx MPICXX=\"mpiicpc -cxx=icpx\" FC=ifx MPIFC=\"mpiifort -fc=ifx\" make test_examples"' From 9655342de99620de632b2506f028972da8dc5158 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 1 Feb 2023 11:11:03 +0100 Subject: [PATCH 002/188] Update comment strings in reaction.py --- docs/source/examples/python/reaction.py | 4 ++-- docs/source/tutorial_code/checkpointing_reaction_partial.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/examples/python/reaction.py b/docs/source/examples/python/reaction.py index b3ffcd7b..0a8335ea 100644 --- a/docs/source/examples/python/reaction.py +++ b/docs/source/examples/python/reaction.py @@ -8,8 +8,8 @@ def reaction() -> None: """A simple exponential reaction model on a 1D grid. """ instance = Instance({ - Operator.F_INIT: ['initial_state'], # list of float - Operator.O_F: ['final_state']}) # list of float + Operator.F_INIT: ['initial_state'], # 1D Grid + Operator.O_F: ['final_state']}) # 1D Grid while instance.reuse_instance(): # F_INIT diff --git a/docs/source/tutorial_code/checkpointing_reaction_partial.py b/docs/source/tutorial_code/checkpointing_reaction_partial.py index 49c706d5..3816f699 100644 --- a/docs/source/tutorial_code/checkpointing_reaction_partial.py +++ b/docs/source/tutorial_code/checkpointing_reaction_partial.py @@ -8,8 +8,8 @@ def reaction() -> None: """A simple exponential reaction model on a 1D grid. """ instance = Instance({ - Operator.F_INIT: ['initial_state'], # list of float - Operator.O_F: ['final_state']}, # list of float + Operator.F_INIT: ['initial_state'], # 1D Grid + Operator.O_F: ['final_state']}, # 1D Grid USES_CHECKPOINT_API) while instance.reuse_instance(): From 2b0493bad0b104a10abe9ebf009ea22264432016 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 1 Feb 2023 11:51:03 +0100 Subject: [PATCH 003/188] Refactor type comments into annotations Also remove flake8<6 restriction. Fixes #137 --- .../examples/python/interact_coupling.py | 8 +-- .../python/libmuscle/checkpoint_triggers.py | 14 +++--- libmuscle/python/libmuscle/communicator.py | 12 ++--- libmuscle/python/libmuscle/endpoint.py | 8 +-- libmuscle/python/libmuscle/instance.py | 8 +-- .../libmuscle/manager/instance_manager.py | 8 +-- .../libmuscle/manager/instance_registry.py | 4 +- .../python/libmuscle/manager/instantiator.py | 4 +- libmuscle/python/libmuscle/manager/manager.py | 2 +- .../libmuscle/manager/qcgpj_instantiator.py | 7 ++- .../libmuscle/manager/snapshot_registry.py | 16 +++--- .../libmuscle/mcp/tcp_transport_client.py | 2 +- .../libmuscle/mcp/tcp_transport_server.py | 4 +- libmuscle/python/libmuscle/mpp_client.py | 2 +- libmuscle/python/libmuscle/mpp_message.py | 3 +- libmuscle/python/libmuscle/outbox.py | 2 +- libmuscle/python/libmuscle/peer_manager.py | 2 +- libmuscle/python/libmuscle/planner/planner.py | 50 +++++++++---------- .../libmuscle/planner/test/test_planner.py | 20 +++----- libmuscle/python/libmuscle/port.py | 2 +- libmuscle/python/libmuscle/post_office.py | 2 +- libmuscle/python/libmuscle/profiler.py | 2 +- libmuscle/python/libmuscle/runner.py | 6 +-- .../python/libmuscle/snapshot_manager.py | 4 +- setup.cfg | 4 +- tox.ini | 4 +- 26 files changed, 97 insertions(+), 103 deletions(-) diff --git a/docs/source/examples/python/interact_coupling.py b/docs/source/examples/python/interact_coupling.py index 093e8296..7f8ebd36 100644 --- a/docs/source/examples/python/interact_coupling.py +++ b/docs/source/examples/python/interact_coupling.py @@ -55,10 +55,10 @@ def __init__(self) -> None: The cache starts out empty. """ - self.t_cur = None # type: Optional[float] - self.data_cur = None # type: Optional[Any] - self.t_next = None # type: Optional[float] - self.data_next = None # type: Optional[Any] + self.t_cur: Optional[float] = None + self.data_cur: Optional[Any] = None + self.t_next: Optional[float] = None + self.data_next: Optional[Any] = None def add_data(self, t: float, data: Any) -> None: """Add new data to the cache. diff --git a/libmuscle/python/libmuscle/checkpoint_triggers.py b/libmuscle/python/libmuscle/checkpoint_triggers.py index 434f5a5d..5d47c0d3 100644 --- a/libmuscle/python/libmuscle/checkpoint_triggers.py +++ b/libmuscle/python/libmuscle/checkpoint_triggers.py @@ -92,7 +92,7 @@ def __init__(self, range: CheckpointRangeRule) -> None: self._start = range.start self._stop = range.stop self._every = range.every - self._last = None # type: Union[int, float, None] + self._last: Union[int, float, None] = None if self._stop is not None: start = 0 if self._start is None else self._start diff = self._stop - start @@ -127,8 +127,8 @@ def __init__(self, checkpoint_rules: List[CheckpointRule]) -> None: Args: checkpoint_rules: checkpoint rules (from ymmsl) """ - self._triggers = [] # type: List[CheckpointTrigger] - at_rules = [] # type: List[CheckpointAtRule] + self._triggers: List[CheckpointTrigger] = [] + at_rules: List[CheckpointAtRule] = [] for rule in checkpoint_rules: if isinstance(rule, CheckpointAtRule): if rule.at: @@ -165,7 +165,7 @@ class TriggerManager: def __init__(self) -> None: self._has_checkpoints = False - self._last_triggers = [] # type: List[str] + self._last_triggers: List[str] = [] self._cpts_considered_until = float('-inf') def set_checkpoint_info( @@ -184,11 +184,11 @@ def set_checkpoint_info( self._wall = CombinedCheckpointTriggers(checkpoints.wallclock_time) self._prevwall = 0.0 - self._nextwall = self._wall.next_checkpoint(0.0) # type: Optional[float] + self._nextwall: Optional[float] = self._wall.next_checkpoint(0.0) self._sim = CombinedCheckpointTriggers(checkpoints.simulation_time) - self._prevsim = None # type: Optional[float] - self._nextsim = None # type: Optional[float] + self._prevsim: Optional[float] = None + self._nextsim: Optional[float] = None def elapsed_walltime(self) -> float: """Returns elapsed wallclock_time in seconds. diff --git a/libmuscle/python/libmuscle/communicator.py b/libmuscle/python/libmuscle/communicator.py index e8ea49d2..cebd45f5 100644 --- a/libmuscle/python/libmuscle/communicator.py +++ b/libmuscle/python/libmuscle/communicator.py @@ -90,16 +90,16 @@ def __init__(self, kernel: Reference, index: List[int], self._post_office = PostOffice() self._profiler = profiler - self._servers = list() # type: List[TransportServer] + self._servers: List[TransportServer] = [] # indexed by remote instance id - self._clients = dict() # type: Dict[Reference, MPPClient] + self._clients: Dict[Reference, MPPClient] = {} for server_type in transport_server_types: server = server_type(self._post_office) self._servers.append(server) - self._ports = dict() # type: Dict[str, Port] + self._ports: Dict[str, Port] = {} def get_locations(self) -> List[str]: """Returns a list of locations that we can be reached at. @@ -156,7 +156,7 @@ def list_ports(self) -> Dict[Operator, List[str]]: port names. Operators with no associated ports are not included. """ - result = dict() # type: Dict[Operator, List[str]] + result: Dict[Operator, List[str]] = {} for port_name, port in self._ports.items(): if port.operator not in result: result[port.operator] = list() @@ -197,7 +197,7 @@ def send_message( """ if slot is None: _logger.debug('Sending message on {}'.format(port_name)) - slot_list = [] # type: List[int] + slot_list: List[int] = [] else: _logger.debug('Sending message on {}[{}]'.format(port_name, slot)) slot_list = [slot] @@ -273,7 +273,7 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, """ if slot is None: port_and_slot = port_name - slot_list = [] # type: List[int] + slot_list: List[int] = [] else: port_and_slot = f"{port_name}[{slot}]" slot_list = [slot] diff --git a/libmuscle/python/libmuscle/endpoint.py b/libmuscle/python/libmuscle/endpoint.py index 52ef0c67..9b22fc60 100644 --- a/libmuscle/python/libmuscle/endpoint.py +++ b/libmuscle/python/libmuscle/endpoint.py @@ -51,10 +51,10 @@ def __init__(self, kernel: Reference, index: List[int], port: Identifier, port: Name of the port used. slot: Slot on which to send or receive. """ - self.kernel = kernel # type: Reference - self.index = index # type: List[int] - self.port = port # type: Identifier - self.slot = slot # type: List[int] + self.kernel = kernel + self.index = index + self.port = port + self.slot = slot def ref(self) -> Reference: """Express as Reference. diff --git a/libmuscle/python/libmuscle/instance.py b/libmuscle/python/libmuscle/instance.py index c71f348c..7bb52cdf 100644 --- a/libmuscle/python/libmuscle/instance.py +++ b/libmuscle/python/libmuscle/instance.py @@ -154,10 +154,10 @@ def __init__( self._trigger_manager = TriggerManager() """Keeps track of checkpoints and triggers snapshots.""" - self._first_run = None # type: Optional[bool] + self._first_run: Optional[bool] = None """Whether this is the first iteration of the reuse loop""" - self._do_reuse = None # type: Optional[bool] + self._do_reuse: Optional[bool] = None """Whether to enter this iteration of the reuse loop This is None during the reuse loop, and set between @@ -170,7 +170,7 @@ def __init__( self._do_init = False """Whether to do f_init on this iteration of the reuse loop""" - self._f_init_cache = dict() # type: _FInitCacheType + self._f_init_cache: _FInitCacheType = {} """Stores pre-received messages for f_init ports""" self._register() @@ -910,7 +910,7 @@ def __make_full_name(self option and splits it into a component name and an index. """ def split_reference(ref: Reference) -> Tuple[Reference, List[int]]: - index = list() # type: List[int] + index: List[int] = [] i = 0 while i < len(ref) and isinstance(ref[i], Identifier): i += 1 diff --git a/libmuscle/python/libmuscle/manager/instance_manager.py b/libmuscle/python/libmuscle/manager/instance_manager.py index 9d2c8b30..c7546e9a 100644 --- a/libmuscle/python/libmuscle/manager/instance_manager.py +++ b/libmuscle/python/libmuscle/manager/instance_manager.py @@ -65,10 +65,10 @@ def __init__( self._configuration = configuration self._run_dir = run_dir - self._resources_in = Queue() # type: Queue[Resources] - self._requests_out = Queue() # type: Queue[InstantiatorRequest] - self._results_in = Queue() # type: Queue[_ResultType] - self._log_records_in = Queue() # type: Queue[logging.LogRecord] + self._resources_in: Queue[Resources] = Queue() + self._requests_out: Queue[InstantiatorRequest] = Queue() + self._results_in: Queue[_ResultType] = Queue() + self._log_records_in: Queue[logging.LogRecord] = Queue() self._instantiator = QCGPJInstantiator( self._resources_in, self._requests_out, self._results_in, diff --git a/libmuscle/python/libmuscle/manager/instance_registry.py b/libmuscle/python/libmuscle/manager/instance_registry.py index 3c218ed3..2efa10c8 100644 --- a/libmuscle/python/libmuscle/manager/instance_registry.py +++ b/libmuscle/python/libmuscle/manager/instance_registry.py @@ -18,8 +18,8 @@ class InstanceRegistry: def __init__(self) -> None: """Construct an empty InstanceRegistry""" self._deregistered_one = Condition() # doubles as lock - self._locations = dict() # type: Dict[Reference, List[str]] - self._ports = dict() # type: Dict[Reference, List[Port]] + self._locations: Dict[Reference, List[str]] = {} + self._ports: Dict[Reference, List[Port]] = {} self._startup = True def add(self, name: Reference, locations: List[str], ports: List[Port] diff --git a/libmuscle/python/libmuscle/manager/instantiator.py b/libmuscle/python/libmuscle/manager/instantiator.py index 56fa91da..2743bd83 100644 --- a/libmuscle/python/libmuscle/manager/instantiator.py +++ b/libmuscle/python/libmuscle/manager/instantiator.py @@ -47,8 +47,8 @@ def __init__(self, instance: Reference, resources: Resources) -> None: self.instance = instance self.resources = resources self.status = ProcessStatus.STARTED - self.exit_code = None # type: Optional[int] - self.error_msg = None # type: Optional[str] + self.exit_code: Optional[int] = None + self.error_msg: Optional[str] = None class InstantiatorRequest: diff --git a/libmuscle/python/libmuscle/manager/manager.py b/libmuscle/python/libmuscle/manager/manager.py index 0ec1da3b..4e0085bf 100644 --- a/libmuscle/python/libmuscle/manager/manager.py +++ b/libmuscle/python/libmuscle/manager/manager.py @@ -57,7 +57,7 @@ def __init__( self._configuration, self._run_dir.path / 'configuration.ymmsl') - self._instance_manager = None # type: Optional[InstanceManager] + self._instance_manager: Optional[InstanceManager] = None try: configuration = self._configuration.as_configuration() if self._run_dir is not None: diff --git a/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py b/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py index 4166ec2a..00fa6542 100644 --- a/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py +++ b/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py @@ -46,7 +46,7 @@ class StateTracker: """ def __init__(self) -> None: """Create a StateTracker.""" - self.processes = dict() # type: Dict[Reference, Process] + self.processes: Dict[Reference, Process] = {} # These are for communicating with QCG-PJ self.queued_to_execute = 0 @@ -122,8 +122,7 @@ def run(self) -> None: self._reconfigure_logging() # Executor needs to be instantiated before we go async - qcg_config = { - qcg_Config.AUX_DIR: str(qcgpj_dir)} # type: Dict[str, str] + qcg_config: Dict[str, str] = {qcg_Config.AUX_DIR: str(qcgpj_dir)} self._qcg_resources = qcg_get_resources(qcg_config) self._state_tracker = StateTracker() self._executor = qcg_Executor( @@ -146,7 +145,7 @@ async def _main(self) -> None: jobs, stopping them, or shutting down. Results of finished jobs are returned via the results queue. """ - qcg_iters = dict() # type: Dict[Reference, qcg_SchedulingIteration] + qcg_iters: Dict[Reference, qcg_SchedulingIteration] = {} await asyncio.sleep(0.01) # allow requests_in queue to be populated diff --git a/libmuscle/python/libmuscle/manager/snapshot_registry.py b/libmuscle/python/libmuscle/manager/snapshot_registry.py index d702f061..11494678 100644 --- a/libmuscle/python/libmuscle/manager/snapshot_registry.py +++ b/libmuscle/python/libmuscle/manager/snapshot_registry.py @@ -200,10 +200,10 @@ def __init__( self._snapshot_folder = snapshot_folder self._topology_store = topology_store - self._queue = Queue() # type: Queue[_QueueItemType] - self._snapshots = {} # type: _SnapshotDictType + self._queue: Queue[_QueueItemType] = Queue() + self._snapshots: _SnapshotDictType = {} - self._instances = set() # type: Set[Reference] + self._instances: Set[Reference] = set() for component in config.model.components: self._instances.update(component.instances()) @@ -294,7 +294,7 @@ def _get_workflow_snapshots( # to further restrict the sets of snapshots as peer snapshots are # selected. # First restriction is that the snapshots have to be locally consistent. - allowed_snapshots = {} # type: Dict[Reference, FrozenSet[SnapshotNode]] + allowed_snapshots: Dict[Reference, FrozenSet[SnapshotNode]] = {} for instance in instances_to_cover: allowed_snapshots[instance] = frozenset( i_snapshot @@ -321,7 +321,7 @@ def num_allowed_snapshots(instance: Reference) -> int: workflow_snapshots = [] selected_snapshots = [snapshot] # This stack stores history of allowed_snapshots and enables roll back - stack = [] # type: List[Dict[Reference, FrozenSet[SnapshotNode]]] + stack: List[Dict[Reference, FrozenSet[SnapshotNode]]] = [] # Update allowed_snapshots for peers of the selected snapshot for peer, snapshots in snapshot.consistent_peers.items(): @@ -430,7 +430,7 @@ def _generate_description( self, selected_snapshots: List[SnapshotNode], now: datetime) -> str: """Generate a human-readable description of the workflow snapshot. """ - triggers = {} # type: Dict[str, List[str]] + triggers: Dict[str, List[str]] = {} component_info = [] max_instance_len = len('Instance ') for node in selected_snapshots: @@ -477,7 +477,7 @@ def _cleanup_snapshots( newest_snapshots[snapshot.instance] = snapshot # Remove all snapshots that are older than the newest snapshots - removed_snapshots = set() # type: Set[SnapshotNode] + removed_snapshots: Set[SnapshotNode] = set() for snapshot in newest_snapshots.values(): all_snapshots = self._snapshots[snapshot.instance] idx = all_snapshots.index(snapshot) @@ -538,7 +538,7 @@ def _get_connections(self, instance: Reference, peer: Reference instance_kernel = instance.without_trailing_ints() peer_kernel = peer.without_trailing_ints() - connected_ports = [] # type: List[_ConnectionType] + connected_ports: List[_ConnectionType] = [] for conduit in self._model.conduits: if (conduit.sending_component() == instance_kernel and conduit.receiving_component() == peer_kernel): diff --git a/libmuscle/python/libmuscle/mcp/tcp_transport_client.py b/libmuscle/python/libmuscle/mcp/tcp_transport_client.py index 88e68510..5fd80b9f 100644 --- a/libmuscle/python/libmuscle/mcp/tcp_transport_client.py +++ b/libmuscle/python/libmuscle/mcp/tcp_transport_client.py @@ -31,7 +31,7 @@ def __init__(self, location: str) -> None: """ addresses = location[4:].split(',') - sock = None # type: Optional[socket.SocketType] + sock: Optional[socket.SocketType] = None for address in addresses: try: sock = self._connect(address) diff --git a/libmuscle/python/libmuscle/mcp/tcp_transport_server.py b/libmuscle/python/libmuscle/mcp/tcp_transport_server.py index 17831064..1866c315 100644 --- a/libmuscle/python/libmuscle/mcp/tcp_transport_server.py +++ b/libmuscle/python/libmuscle/mcp/tcp_transport_server.py @@ -87,7 +87,7 @@ def get_location(self) -> str: """ host, port = self._server.server_address - locs = list() # type: List[str] + locs: List[str] = [] for address in self._get_if_addresses(): locs.append('{}:{}'.format(address, port)) return 'tcp:{}'.format(','.join(locs)) @@ -103,7 +103,7 @@ def close(self) -> None: self._server.server_close() def _get_if_addresses(self) -> List[str]: - all_addresses = list() # type: List[str] + all_addresses: List[str] = [] ifs = netifaces.interfaces() for interface in ifs: addrs = netifaces.ifaddresses(interface) diff --git a/libmuscle/python/libmuscle/mpp_client.py b/libmuscle/python/libmuscle/mpp_client.py index 17af1447..e7fa5172 100644 --- a/libmuscle/python/libmuscle/mpp_client.py +++ b/libmuscle/python/libmuscle/mpp_client.py @@ -24,7 +24,7 @@ def __init__(self, locations: List[str]) -> None: Args: locations: The peer's location strings """ - client = None # type: Optional[TransportClient] + client: Optional[TransportClient] = None for ClientType in transport_client_types: for location in locations: if ClientType.can_connect_to(location): diff --git a/libmuscle/python/libmuscle/mpp_message.py b/libmuscle/python/libmuscle/mpp_message.py index 976f9f5e..e78d8624 100644 --- a/libmuscle/python/libmuscle/mpp_message.py +++ b/libmuscle/python/libmuscle/mpp_message.py @@ -103,8 +103,7 @@ def _decode_grid(code: int, data: bytes) -> Grid: order = order_map[grid_dict['order']] shape = tuple(grid_dict['shape']) dtype = type_map[ExtTypeId(code)] - array = np.ndarray( # type: ignore - shape, dtype, grid_dict['data'], order=order) # type: ignore + array = np.ndarray(shape, dtype, grid_dict['data'], order=order) # type: ignore indexes = grid_dict['indexes'] if indexes == []: indexes = None diff --git a/libmuscle/python/libmuscle/outbox.py b/libmuscle/python/libmuscle/outbox.py index 26190b02..0770dd3a 100644 --- a/libmuscle/python/libmuscle/outbox.py +++ b/libmuscle/python/libmuscle/outbox.py @@ -10,7 +10,7 @@ class Outbox: def __init__(self) -> None: """Create an empty Outbox. """ - self.__queue = Queue() # type: Queue[bytes] + self.__queue: Queue[bytes] = Queue() def is_empty(self) -> bool: """Returns True iff the outbox is empty. diff --git a/libmuscle/python/libmuscle/peer_manager.py b/libmuscle/python/libmuscle/peer_manager.py index a0c28c9a..8725c1ee 100644 --- a/libmuscle/python/libmuscle/peer_manager.py +++ b/libmuscle/python/libmuscle/peer_manager.py @@ -34,7 +34,7 @@ def __init__(self, kernel: Reference, index: List[int], self.__index = index # peer port ids, indexed by local kernel.port id - self.__peers = dict() # type: Dict[Reference, List[Reference]] + self.__peers: Dict[Reference, List[Reference]] = {} for conduit in conduits: if str(conduit.sending_component()) == str(kernel): diff --git a/libmuscle/python/libmuscle/planner/planner.py b/libmuscle/python/libmuscle/planner/planner.py index 883cdb2c..eab7a3c2 100644 --- a/libmuscle/python/libmuscle/planner/planner.py +++ b/libmuscle/python/libmuscle/planner/planner.py @@ -26,21 +26,21 @@ def __init__(self, model: Model) -> None: """ self._model = model - self._direct_superpreds = dict() # type: _PredSuccType - self._direct_predecessors = dict() # type: _PredSuccType - self._direct_subpreds = dict() # type: _PredSuccType + self._direct_superpreds: _PredSuccType = {} + self._direct_predecessors: _PredSuccType = {} + self._direct_subpreds: _PredSuccType = {} - self._direct_supersuccs = dict() # type: _PredSuccType - self._direct_successors = dict() # type: _PredSuccType - self._direct_subsuccs = dict() # type: _PredSuccType + self._direct_supersuccs: _PredSuccType = {} + self._direct_successors: _PredSuccType = {} + self._direct_subsuccs: _PredSuccType = {} - self._superpreds = dict() # type: _PredSuccType - self._predecessors = dict() # type: _PredSuccType - self._subpreds = dict() # type: _PredSuccType + self._superpreds: _PredSuccType = {} + self._predecessors: _PredSuccType = {} + self._subpreds: _PredSuccType = {} - self._supersuccs = dict() # type: _PredSuccType - self._successors = dict() # type: _PredSuccType - self._subsuccs = dict() # type: _PredSuccType + self._supersuccs: _PredSuccType = {} + self._successors: _PredSuccType = {} + self._subsuccs: _PredSuccType = {} self._calc_direct_succs_preds() self._calc_predecessors() @@ -158,10 +158,10 @@ def _calc_predecessors(self) -> None: for c in self._model.components} todo = set(self._model.components) - started = set() # type: Set[Component] - doing = set() # type: Set[Component] - finished = set() # type: Set[Component] - done = set() # type: Set[Component] + started: Set[Component] = set() + doing: Set[Component] = set() + finished: Set[Component] = set() + done: Set[Component] = set() while todo or doing: started.clear() for component in todo: @@ -234,10 +234,10 @@ def _calc_successors(self) -> None: for c in self._model.components} todo = set(self._model.components) - started = set() # type: Set[Component] - doing = set() # type: Set[Component] - finished = set() # type: Set[Component] - done = set() # type: Set[Component] + started: Set[Component] = set() + doing: Set[Component] = set() + finished: Set[Component] = set() + done: Set[Component] = set() while todo or doing: started.clear() for component in todo: @@ -346,7 +346,7 @@ def __init__(self, cores: Optional[Dict[str, Set[int]]] = None) -> None: cores: Cores to be designated by this object. """ if cores is None: - self.cores = dict() # type: Dict[str, Set[int]] + self.cores: Dict[str, Set[int]] = {} else: self.cores = cores @@ -429,8 +429,8 @@ def __init__(self, all_resources: Resources): to be managed by this ResourceManager. """ self._all_resources = all_resources - self._allocations = dict() # type: Dict[Reference, Resources] - self._oversubscribed = dict() # type: Dict[Reference, Resources] + self._allocations: Dict[Reference, Resources] = {} + self._oversubscribed: Dict[Reference, Resources] = {} self._next_virtual_node = 1 def allocate_all( @@ -458,7 +458,7 @@ def allocate_all( Returns: Resources for each instance required by the model. """ - result = dict() # type: Dict[Reference, Resources] + result: Dict[Reference, Resources] = {} # Analyse model model = ModelGraph(configuration.model) @@ -472,7 +472,7 @@ def allocate_all( # Allocate unallocated_instances = [ i for c in model.components() for i in c.instances()] - leftover_instances = list() # type: List[Reference] + leftover_instances: List[Reference] = [] while unallocated_instances: leftover_instances.clear() diff --git a/libmuscle/python/libmuscle/planner/test/test_planner.py b/libmuscle/python/libmuscle/planner/test/test_planner.py index b2e6460f..0687657c 100644 --- a/libmuscle/python/libmuscle/planner/test/test_planner.py +++ b/libmuscle/python/libmuscle/planner/test/test_planner.py @@ -209,9 +209,8 @@ def test_oversubscribe( def test_oversubscribe_single_instance_threaded() -> None: model = Model('single_instance', [Component('x', 'x', ports=Ports())]) impl = [Implementation(Reference('x'), script='x')] - reqs = { - Reference('x'): ThreadedResReq(Reference('x'), 24) - } # type: Dict[Reference, ResourceRequirements] + reqs: Dict[Reference, ResourceRequirements] = { + Reference('x'): ThreadedResReq(Reference('x'), 24)} config = Configuration(model, None, impl, reqs) res = Resources({'node001': {1, 2, 3, 4}}) @@ -225,9 +224,8 @@ def test_oversubscribe_single_instance_threaded() -> None: def test_oversubscribe_single_instance_mpi() -> None: model = Model('single_instance', [Component('x', 'x', ports=Ports())]) impl = [Implementation(Reference('x'), script='x')] - reqs = { - Reference('x'): MPICoresResReq(Reference('x'), 24) - } # type: Dict[Reference, ResourceRequirements] + reqs: Dict[Reference, ResourceRequirements] = { + Reference('x'): MPICoresResReq(Reference('x'), 24)} config = Configuration(model, None, impl, reqs) res = Resources({'node001': {1, 2, 3, 4}}) @@ -241,9 +239,8 @@ def test_oversubscribe_single_instance_mpi() -> None: def test_virtual_allocation() -> None: model = Model('ensemble', [Component('x', 'x', 9, ports=Ports())]) impl = [Implementation(Reference('x'), script='x')] - reqs = { - Reference('x'): MPICoresResReq(Reference('x'), 13) - } # type: Dict[Reference, ResourceRequirements] + reqs: Dict[Reference, ResourceRequirements] = { + Reference('x'): MPICoresResReq(Reference('x'), 13)} config = Configuration(model, None, impl, reqs) res = Resources({'node000001': {1, 2, 3, 4}}) @@ -259,9 +256,8 @@ def test_virtual_allocation() -> None: def test_impossible_virtual_allocation() -> None: model = Model('ensemble', [Component('x', 'x', 9, ports=Ports())]) impl = [Implementation(Reference('x'), script='x')] - reqs = { - Reference('x'): ThreadedResReq(Reference('x'), 13) - } # type: Dict[Reference, ResourceRequirements] + reqs: Dict[Reference, ResourceRequirements] = { + Reference('x'): ThreadedResReq(Reference('x'), 13)} config = Configuration(model, None, impl, reqs) res = Resources({'node000001': {1, 2, 3, 4}}) diff --git a/libmuscle/python/libmuscle/port.py b/libmuscle/python/libmuscle/port.py index a6f955a5..7fda993a 100644 --- a/libmuscle/python/libmuscle/port.py +++ b/libmuscle/python/libmuscle/port.py @@ -50,7 +50,7 @@ def __init__(self, name: str, operator: Operator, is_vector: bool, if is_vector: if our_ndims == len(peer_dims): - self._length = 0 # type: Optional[int] + self._length: Optional[int] = 0 elif our_ndims + 1 == len(peer_dims): self._length = peer_dims[-1] elif our_ndims > len(peer_dims): diff --git a/libmuscle/python/libmuscle/post_office.py b/libmuscle/python/libmuscle/post_office.py index 341141fd..e15057fc 100644 --- a/libmuscle/python/libmuscle/post_office.py +++ b/libmuscle/python/libmuscle/post_office.py @@ -19,7 +19,7 @@ class PostOffice(RequestHandler): def __init__(self) -> None: """Create a PostOffice. """ - self._outboxes = dict() # type: Dict[Reference, Outbox] + self._outboxes: Dict[Reference, Outbox] = {} self._outbox_lock = Lock() diff --git a/libmuscle/python/libmuscle/profiler.py b/libmuscle/python/libmuscle/profiler.py index 04ab390b..7d1eee85 100644 --- a/libmuscle/python/libmuscle/profiler.py +++ b/libmuscle/python/libmuscle/profiler.py @@ -20,7 +20,7 @@ def __init__(self, instance_id: Reference, manager: MMPClient) -> None: # TODO: use a background thread for flushing self._instance_id = instance_id self._manager = manager - self._events = list() # type: List[ProfileEvent] + self._events: List[ProfileEvent] = [] def start(self, event_type: ProfileEventType, port: Optional[Port] = None, port_length: Optional[int] = None, slot: Optional[int] = None, diff --git a/libmuscle/python/libmuscle/runner.py b/libmuscle/python/libmuscle/runner.py index 322515c4..8baa1b95 100644 --- a/libmuscle/python/libmuscle/runner.py +++ b/libmuscle/python/libmuscle/runner.py @@ -96,7 +96,7 @@ def implementation_process( implementation: Callable) -> None: prefix_tag = '--muscle-prefix=' name_prefix = str() - index_prefix = list() # type: List[int] + index_prefix: List[int] = [] instance = Reference(instance_id) @@ -182,7 +182,7 @@ def parse_number(prefix: str, i: int) -> Tuple[int, int]: return int(number), i name = str() - index = list() # type: List[int] + index: List[int] = [] i = 0 if i == len(prefix): @@ -212,7 +212,7 @@ def parse_number(prefix: str, i: int) -> Tuple[int, int]: def _split_reference(ref: Reference) -> Tuple[Reference, List[int]]: - index = list() # type: List[int] + index: List[int] = [] i = 0 while i < len(ref) and isinstance(ref[i], Identifier): i += 1 diff --git a/libmuscle/python/libmuscle/snapshot_manager.py b/libmuscle/python/libmuscle/snapshot_manager.py index e23b73dc..c4d1716e 100644 --- a/libmuscle/python/libmuscle/snapshot_manager.py +++ b/libmuscle/python/libmuscle/snapshot_manager.py @@ -43,7 +43,7 @@ def __init__(self, self._communicator = communicator self._manager = manager - self._resume_from_snapshot = None # type: Optional[Snapshot] + self._resume_from_snapshot: Optional[Snapshot] = None self._resume_overlay = Settings() self._next_snapshot_num = 1 @@ -65,7 +65,7 @@ def prepare_resume( Returns: Time at which the initial snapshot was saved, if resuming. """ - result = None # type: Optional[float] + result: Optional[float] = None self._snapshot_directory = snapshot_directory or Path.cwd() if resume_snapshot is not None: snapshot = self.load_snapshot_from_file(resume_snapshot) diff --git a/setup.cfg b/setup.cfg index 3fe0e009..86c38b0b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,7 +51,7 @@ ignore_missing_imports = True #ignore_missing_imports = True [flake8] -ignore = +per-file-ignores = setup.py: E501 - libmuscle/manager_protocol/*.py: ALL *.py: E123 E126 W504 +max-line-length = 88 diff --git a/tox.ini b/tox.ini index 3ef4eb68..d39feaac 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ skip_missing_interpreters = true [testenv] deps = mypy - flake8<6 + flake8 pytest pytest-cov ymmsl @@ -28,7 +28,7 @@ python = 3.10: py310 [pycodestyle] -max-doc-length = 80 +max-doc-length = 88 [testenv:docs] description = Build documentation using Sphinx From 06342b604e378b44e877a411b54ced8417682bae Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Wed, 16 Nov 2022 12:00:22 +0100 Subject: [PATCH 004/188] Record message timestamps with profile events --- libmuscle/python/libmuscle/communicator.py | 10 ++++++---- libmuscle/python/libmuscle/mmp_client.py | 2 +- libmuscle/python/libmuscle/profiler.py | 10 +++++++--- libmuscle/python/libmuscle/profiling.py | 22 ++++++++++++++-------- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/libmuscle/python/libmuscle/communicator.py b/libmuscle/python/libmuscle/communicator.py index cebd45f5..3ed5f5a0 100644 --- a/libmuscle/python/libmuscle/communicator.py +++ b/libmuscle/python/libmuscle/communicator.py @@ -214,8 +214,9 @@ def send_message( return port = self._ports[port_name] - profile_event = self._profiler.start(ProfileEventType.SEND, port, - None, slot, None) + profile_event = self._profiler.start( + ProfileEventType.SEND, port, None, slot, None, + message.timestamp) recv_endpoints = self._peer_manager.get_peer_endpoints( snd_endpoint.port, slot_list) @@ -301,8 +302,8 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, # built-in automatic ports. port = self._muscle_settings_in - profile_event = self._profiler.start(ProfileEventType.RECEIVE, port, - None, slot, None) + profile_event = self._profiler.start( + ProfileEventType.RECEIVE, port, None, slot) # peer_manager already checks that there is at most one snd_endpoint # connected to the port we receive on @@ -324,6 +325,7 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, mpp_message.data, mpp_message.settings_overlay) profile_event.stop() + profile_event.message_timestamp = message.timestamp if port.is_vector(): profile_event.port_length = port.get_length() profile_event.message_size = len(mpp_message_bytes) diff --git a/libmuscle/python/libmuscle/mmp_client.py b/libmuscle/python/libmuscle/mmp_client.py index 1deded1e..b474c4f0 100644 --- a/libmuscle/python/libmuscle/mmp_client.py +++ b/libmuscle/python/libmuscle/mmp_client.py @@ -51,7 +51,7 @@ def encode_profile_event(event: ProfileEvent) -> Any: event.start_time.seconds, event.stop_time.seconds, event.event_type.value, encoded_port, event.port_length, event.slot, - event.message_size] + event.message_size, event.message_timestamp] def decode_checkpoint_rule(rule: Dict[str, Any]) -> CheckpointRule: diff --git a/libmuscle/python/libmuscle/profiler.py b/libmuscle/python/libmuscle/profiler.py index 7d1eee85..2ff130da 100644 --- a/libmuscle/python/libmuscle/profiler.py +++ b/libmuscle/python/libmuscle/profiler.py @@ -24,7 +24,8 @@ def __init__(self, instance_id: Reference, manager: MMPClient) -> None: def start(self, event_type: ProfileEventType, port: Optional[Port] = None, port_length: Optional[int] = None, slot: Optional[int] = None, - message_size: Optional[int] = None + message_size: Optional[int] = None, + message_timestamp: Optional[float] = None ) -> ProfileEvent: """Start measuring an event. @@ -38,6 +39,7 @@ def start(self, event_type: ProfileEventType, port: Optional[Port] = None, port_length: Length of the port, if vector. slot: Slot that was sent or received on. message_size: Size in bytes of the message. + message_timestamp: Timestamp of the message. Returns: A new ProfileEvent. @@ -46,8 +48,10 @@ def start(self, event_type: ProfileEventType, port: Optional[Port] = None, self.__flush() now = Timestamp(time()) - event = ProfileEvent(self._instance_id, now, now, event_type, port, - port_length, slot, message_size) + event = ProfileEvent( + self._instance_id, now, now, event_type, port, port_length, + slot, message_size, message_timestamp) + self._events.append(event) return event diff --git a/libmuscle/python/libmuscle/profiling.py b/libmuscle/python/libmuscle/profiling.py index b3113da0..2abcb16b 100644 --- a/libmuscle/python/libmuscle/profiling.py +++ b/libmuscle/python/libmuscle/profiling.py @@ -10,7 +10,7 @@ class ProfileEventType(Enum): """Profiling event types for MUSCLE3. - These match the types in the MUSCLE Manager Protocol, and should \ + These match the types in the MUSCLE Manager Protocol, and should be kept identical to those. """ REGISTER = 0 @@ -27,30 +27,34 @@ class ProfileEvent: that occurred while executing the simulation. Args: - instance_id: The identifier of the instance that generated \ + instance_id: The identifier of the instance that generated this message. - start_time: When the event started (real-world, not \ + start_time: When the event started (real-world, not simulation time). - stop_time: When the event ended (real-world, not simulation \ + stop_time: When the event ended (real-world, not simulation time). event_type: Type of event that was measured. port: Port used for sending or receiving, if applicable. port_length: Length of that port, if a vector. slot: Slot that was sent or received on, if applicable. message_size: Size of the message involved, if applicable. + message_timestamp: Timestamp sent with the message, if + applicable. Attributes: - instance_id: The identifier of the instance that generated \ + instance_id: The identifier of the instance that generated this message. - start_time: When the event started (real-world, not \ + start_time: When the event started (real-world, not simulation time). - stop_time: When the event ended (real-world, not simulation \ + stop_time: When the event ended (real-world, not simulation time). event_type: Type of event that was measured. port: Port used for sending or receiving, if applicable. port_length: Length of that port, if a vector. slot: Slot that was sent or received on, if applicable. message_size: Size of the message involved, if applicable. + message_timestamp: Timestamp sent with the message, if + applicable. """ def __init__( self, @@ -61,7 +65,8 @@ def __init__( port: Optional[Port] = None, port_length: Optional[int] = None, slot: Optional[int] = None, - message_size: Optional[int] = None + message_size: Optional[int] = None, + message_timestamp: Optional[float] = None ) -> None: self.instance_id = instance_id @@ -72,6 +77,7 @@ def __init__( self.port_length = port_length self.slot = slot self.message_size = message_size + self.message_timestamp = message_timestamp def stop(self) -> None: """Sets stop_time to the current time. From ed1684a5e3172034a1e99d06d15915fa5eaab347 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Wed, 16 Nov 2022 16:31:42 +0100 Subject: [PATCH 005/188] Refactor Timestamp a bit --- libmuscle/python/libmuscle/profiler.py | 3 +-- libmuscle/python/libmuscle/profiling.py | 3 +-- libmuscle/python/libmuscle/timestamp.py | 6 +++++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/libmuscle/python/libmuscle/profiler.py b/libmuscle/python/libmuscle/profiler.py index 2ff130da..b58dc645 100644 --- a/libmuscle/python/libmuscle/profiler.py +++ b/libmuscle/python/libmuscle/profiler.py @@ -1,4 +1,3 @@ -from time import time from typing import List, Optional from ymmsl import Port, Reference @@ -47,7 +46,7 @@ def start(self, event_type: ProfileEventType, port: Optional[Port] = None, if len(self._events) >= 100: self.__flush() - now = Timestamp(time()) + now = Timestamp() event = ProfileEvent( self._instance_id, now, now, event_type, port, port_length, slot, message_size, message_timestamp) diff --git a/libmuscle/python/libmuscle/profiling.py b/libmuscle/python/libmuscle/profiling.py index 2abcb16b..6c20aa10 100644 --- a/libmuscle/python/libmuscle/profiling.py +++ b/libmuscle/python/libmuscle/profiling.py @@ -1,5 +1,4 @@ from enum import Enum -import time from typing import Optional from ymmsl import Port, Reference @@ -82,4 +81,4 @@ def __init__( def stop(self) -> None: """Sets stop_time to the current time. """ - self.stop_time = Timestamp(time.time()) + self.stop_time = Timestamp() diff --git a/libmuscle/python/libmuscle/timestamp.py b/libmuscle/python/libmuscle/timestamp.py index 1a8ee840..01636d9b 100644 --- a/libmuscle/python/libmuscle/timestamp.py +++ b/libmuscle/python/libmuscle/timestamp.py @@ -1,4 +1,6 @@ import datetime +import time +from typing import Optional class Timestamp: @@ -7,7 +9,9 @@ class Timestamp: Args: seconds: The number of seconds since the start of 1970. """ - def __init__(self, seconds: float) -> None: + def __init__(self, seconds: Optional[float] = None) -> None: + if seconds is None: + seconds = time.time() self.seconds = seconds def to_asctime(self) -> str: From a120dba6ab13ee147e22698fd5e4a5698cbcab67 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Wed, 16 Nov 2022 18:17:25 +0100 Subject: [PATCH 006/188] Refactor Instance._instance_name() a bit --- libmuscle/python/libmuscle/instance.py | 27 ++++++++++++-------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/libmuscle/python/libmuscle/instance.py b/libmuscle/python/libmuscle/instance.py index 7bb52cdf..0213ccfd 100644 --- a/libmuscle/python/libmuscle/instance.py +++ b/libmuscle/python/libmuscle/instance.py @@ -124,6 +124,9 @@ def __init__( self._name, self._index = self.__make_full_name() """Name and index of this instance.""" + self._instance_id = self._name + self._index + """Full id of this instance.""" + mmp_location = self.__extract_manager_location() self.__manager = MMPClient(mmp_location) """Client object for talking to the manager.""" @@ -134,7 +137,7 @@ def __init__( InstanceFlags.USES_CHECKPOINT_API in self._flags) """Checks that the user uses the API correctly.""" - self._profiler = Profiler(self._instance_name(), self.__manager) + self._profiler = Profiler(self._instance_id, self.__manager) """Profiler for this instance.""" self._communicator = Communicator( @@ -148,7 +151,7 @@ def __init__( """Settings for this instance.""" self._snapshot_manager = SnapshotManager( - self._instance_name(), self.__manager, self._communicator) + self._instance_id, self.__manager, self._communicator) """Resumes, loads and saves snapshots.""" self._trigger_manager = TriggerManager() @@ -178,8 +181,7 @@ def __init__( # Note: get_checkpoint_info needs to have the ports initialized # so it comes after self._connect() - checkpoint_info = self.__manager.get_checkpoint_info( - self._instance_name()) + checkpoint_info = self.__manager.get_checkpoint_info(self._instance_id) elapsed_time, checkpoints = checkpoint_info[0:2] self._trigger_manager.set_checkpoint_info(elapsed_time, checkpoints) @@ -339,7 +341,7 @@ def get_setting(self, name: str, typ: Optional[str] = None as expected. """ return self._settings_manager.get_setting( - self._instance_name(), Reference(name), typ) + self._instance_id, Reference(name), typ) def list_ports(self) -> Dict[Operator, List[str]]: """Returns a description of the ports that this Instance has. @@ -708,7 +710,7 @@ def _register(self) -> None: locations = self._communicator.get_locations() port_list = self.__list_declared_ports() self.__manager.register_instance( - self._instance_name(), locations, port_list) + self._instance_id, locations, port_list) register_event.stop() _logger.info('Registered with the manager') @@ -717,7 +719,7 @@ def _connect(self) -> None: """ connect_event = self._profiler.start(ProfileEventType.CONNECT) conduits, peer_dims, peer_locations = self.__manager.request_peers( - self._instance_name()) + self._instance_id) self._communicator.connect(conduits, peer_dims, peer_locations) self._settings_manager.base = self.__manager.get_settings() connect_event.stop() @@ -727,7 +729,7 @@ def _deregister(self) -> None: """Deregister this instance from the manager. """ deregister_event = self._profiler.start(ProfileEventType.DEREGISTER) - self.__manager.deregister_instance(self._instance_name()) + self.__manager.deregister_instance(self._instance_id) deregister_event.stop() # this is the last thing we'll profile, so flush messages self._profiler.shutdown() @@ -760,7 +762,7 @@ def __extract_manager_location() -> str: def __set_up_logging(self) -> None: """Adds logging handlers for one or more instances. """ - id_str = str(self._instance_name()) + id_str = str(self._instance_id) logfile = extract_log_file_location('muscle3.{}.log'.format(id_str)) if logfile is not None: @@ -958,11 +960,6 @@ def __list_declared_ports(self) -> List[Port]: result.append(Port(Identifier(name), operator)) return result - def _instance_name(self) -> Reference: - """Returns the full instance name. - """ - return self._name + self._index - def __check_port(self, port_name: str) -> None: if not self._communicator.port_exists(port_name): err_msg = (('Port "{}" does not exist on "{}". Please check' @@ -1013,7 +1010,7 @@ def __receive_settings(self) -> bool: ' muscle_settings_in that is not a' ' Settings. It seems that your' ' simulation is miswired or the sending' - ' instance is broken.'.format(self._instance_name())) + ' instance is broken.'.format(self._instance_id)) self.__shutdown(err_msg) raise RuntimeError(err_msg) From 890c5f782098335cfed2b4d7c64b155a401b9519 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Wed, 16 Nov 2022 19:01:36 +0100 Subject: [PATCH 007/188] Factor instance_id out of ProfileEvent into MMPClient --- integration_test/test_logging.py | 6 ++- integration_test/test_registration.py | 29 ++++++----- libmuscle/python/libmuscle/communicator.py | 14 ++--- libmuscle/python/libmuscle/instance.py | 28 +++++----- .../python/libmuscle/manager/mmp_server.py | 3 +- libmuscle/python/libmuscle/mmp_client.py | 41 ++++++++------- libmuscle/python/libmuscle/profiler.py | 51 ++++--------------- libmuscle/python/libmuscle/profiling.py | 26 +++++----- libmuscle/python/libmuscle/test/conftest.py | 4 +- .../python/libmuscle/test/test_instance.py | 3 +- .../python/libmuscle/test/test_mmp_client.py | 25 +++++---- 11 files changed, 103 insertions(+), 127 deletions(-) diff --git a/integration_test/test_logging.py b/integration_test/test_logging.py index e187f9dc..6b3a8b79 100644 --- a/integration_test/test_logging.py +++ b/integration_test/test_logging.py @@ -1,6 +1,7 @@ import multiprocessing as mp import ymmsl +from ymmsl import Reference from libmuscle.logging import LogLevel, LogMessage, Timestamp from libmuscle.manager.manager import Manager @@ -36,9 +37,10 @@ def do_logging_test(caplog): manager = Manager(ymmsl_doc) # create client - client = MMPClient(manager.get_server_location()) + instance_id = Reference('test_logging') + client = MMPClient(instance_id, manager.get_server_location()) message = LogMessage( - instance_id='test_logging', + instance_id=str(instance_id), timestamp=Timestamp(2.0), level=LogLevel.DEBUG, text='Integration testing') diff --git a/integration_test/test_registration.py b/integration_test/test_registration.py index 87181de5..f359ff53 100644 --- a/integration_test/test_registration.py +++ b/integration_test/test_registration.py @@ -7,12 +7,11 @@ def test_registration(log_file_in_tmpdir, mmp_server): - client = MMPClient(mmp_server.get_location()) instance_name = Reference('test_instance') + client = MMPClient(instance_name, mmp_server.get_location()) port = Port(Reference('test_in'), Operator.S) - client.register_instance(instance_name, ['tcp:localhost:10000'], - [port]) + client.register_instance(['tcp:localhost:10000'], [port]) registry = mmp_server._handler._instance_registry @@ -24,12 +23,13 @@ def test_registration(log_file_in_tmpdir, mmp_server): def test_wiring(log_file_in_tmpdir, mmp_server_process): # mmp_server_process starts a server and returns its location - client = MMPClient(mmp_server_process) + client = MMPClient(Reference('macro'), mmp_server_process) - client.register_instance(Reference('macro'), ['direct:macro'], []) + client.register_instance(['direct:macro'], []) - conduits, peer_dims, peer_locations = client.request_peers( - Reference('micro[0]')) + client2 = MMPClient(Reference('micro[0]'), mmp_server_process) + conduits, peer_dims, peer_locations = client2.request_peers() + client2.close() assert Conduit('macro.out', 'micro.in') in conduits assert Conduit('micro.out', 'macro.in') in conduits @@ -41,26 +41,29 @@ def test_wiring(log_file_in_tmpdir, mmp_server_process): patch('libmuscle.mmp_client.PEER_INTERVAL_MIN', 0.01), \ patch('libmuscle.mmp_client.PEER_INTERVAL_MAX', 0.1): with pytest.raises(RuntimeError): - client.request_peers(Reference('macro')) + client.request_peers() for i in range(5): instance = Reference('micro[{}]'.format(i)) + client2 = MMPClient(instance, mmp_server_process) location = 'direct:{}'.format(instance) - client.register_instance(instance, [location], []) + client2.register_instance([location], []) + client2.close() with patch('libmuscle.mmp_client.PEER_TIMEOUT', 0.1), \ patch('libmuscle.mmp_client.PEER_INTERVAL_MIN', 0.01), \ patch('libmuscle.mmp_client.PEER_INTERVAL_MAX', 0.1): with pytest.raises(RuntimeError): - client.request_peers(Reference('macro')) + client.request_peers() for i in range(5, 10): instance = Reference('micro[{}]'.format(i)) + client2 = MMPClient(instance, mmp_server_process) location = 'direct:{}'.format(instance) - client.register_instance(instance, [location], []) + client2.register_instance([location], []) + client2.close() - conduits, peer_dims, peer_locations = client.request_peers( - Reference('macro')) + conduits, peer_dims, peer_locations = client.request_peers() assert Conduit('macro.out', 'micro.in') in conduits assert Conduit('micro.out', 'macro.in') in conduits diff --git a/libmuscle/python/libmuscle/communicator.py b/libmuscle/python/libmuscle/communicator.py index 3ed5f5a0..f0eb64e8 100644 --- a/libmuscle/python/libmuscle/communicator.py +++ b/libmuscle/python/libmuscle/communicator.py @@ -11,7 +11,8 @@ from libmuscle.post_office import PostOffice from libmuscle.port import Port from libmuscle.profiler import Profiler -from libmuscle.profiling import ProfileEventType +from libmuscle.profiling import ProfileEvent, ProfileEventType +from libmuscle.timestamp import Timestamp _logger = logging.getLogger(__name__) @@ -214,9 +215,9 @@ def send_message( return port = self._ports[port_name] - profile_event = self._profiler.start( - ProfileEventType.SEND, port, None, slot, None, - message.timestamp) + profile_event = ProfileEvent( + ProfileEventType.SEND, Timestamp(), None, port, None, slot, + None, message.timestamp) recv_endpoints = self._peer_manager.get_peer_endpoints( snd_endpoint.port, slot_list) @@ -242,6 +243,7 @@ def send_message( if port.is_vector(): profile_event.port_length = port.get_length() profile_event.message_size = len(encoded_message) + self._profiler.record_event(profile_event) def receive_message(self, port_name: str, slot: Optional[int] = None, default: Optional[Message] = None @@ -302,8 +304,8 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, # built-in automatic ports. port = self._muscle_settings_in - profile_event = self._profiler.start( - ProfileEventType.RECEIVE, port, None, slot) + profile_event = ProfileEvent( + ProfileEventType.RECEIVE, Timestamp(), None, port, None, slot) # peer_manager already checks that there is at most one snd_endpoint # connected to the port we receive on diff --git a/libmuscle/python/libmuscle/instance.py b/libmuscle/python/libmuscle/instance.py index 0213ccfd..adcd0777 100644 --- a/libmuscle/python/libmuscle/instance.py +++ b/libmuscle/python/libmuscle/instance.py @@ -20,8 +20,9 @@ from libmuscle.mpp_message import ClosePort from libmuscle.mmp_client import MMPClient from libmuscle.profiler import Profiler -from libmuscle.profiling import ProfileEventType +from libmuscle.profiling import ProfileEvent, ProfileEventType from libmuscle.snapshot_manager import SnapshotManager +from libmuscle.timestamp import Timestamp from libmuscle.util import extract_log_file_location @@ -128,7 +129,7 @@ def __init__( """Full id of this instance.""" mmp_location = self.__extract_manager_location() - self.__manager = MMPClient(mmp_location) + self.__manager = MMPClient(self._instance_id, mmp_location) """Client object for talking to the manager.""" self.__set_up_logging() @@ -137,7 +138,7 @@ def __init__( InstanceFlags.USES_CHECKPOINT_API in self._flags) """Checks that the user uses the API correctly.""" - self._profiler = Profiler(self._instance_id, self.__manager) + self._profiler = Profiler(self.__manager) """Profiler for this instance.""" self._communicator = Communicator( @@ -706,31 +707,30 @@ def __f_init_max_timestamp(self) -> Optional[float]: def _register(self) -> None: """Register this instance with the manager. """ - register_event = self._profiler.start(ProfileEventType.REGISTER) + register_event = ProfileEvent(ProfileEventType.REGISTER, Timestamp()) locations = self._communicator.get_locations() port_list = self.__list_declared_ports() - self.__manager.register_instance( - self._instance_id, locations, port_list) - register_event.stop() + self.__manager.register_instance(locations, port_list) + self._profiler.record_event(register_event) _logger.info('Registered with the manager') def _connect(self) -> None: """Connect this instance to the given peers / conduits. """ - connect_event = self._profiler.start(ProfileEventType.CONNECT) - conduits, peer_dims, peer_locations = self.__manager.request_peers( - self._instance_id) + connect_event = ProfileEvent(ProfileEventType.CONNECT, Timestamp()) + conduits, peer_dims, peer_locations = self.__manager.request_peers() self._communicator.connect(conduits, peer_dims, peer_locations) self._settings_manager.base = self.__manager.get_settings() - connect_event.stop() + self._profiler.record_event(connect_event) _logger.info('Received peer locations and base settings') def _deregister(self) -> None: """Deregister this instance from the manager. """ - deregister_event = self._profiler.start(ProfileEventType.DEREGISTER) - self.__manager.deregister_instance(self._instance_id) - deregister_event.stop() + deregister_event = ProfileEvent( + ProfileEventType.DEREGISTER, Timestamp()) + self.__manager.deregister_instance() + self._profiler.record_event(deregister_event) # this is the last thing we'll profile, so flush messages self._profiler.shutdown() _logger.info('Deregistered from the manager') diff --git a/libmuscle/python/libmuscle/manager/mmp_server.py b/libmuscle/python/libmuscle/manager/mmp_server.py index 6fc4bae2..43d07430 100644 --- a/libmuscle/python/libmuscle/manager/mmp_server.py +++ b/libmuscle/python/libmuscle/manager/mmp_server.py @@ -260,7 +260,8 @@ def _submit_log_message( instance_id, Timestamp(timestamp), LogLevel(level), text) return [ResponseType.SUCCESS.value] - def _submit_profile_events(self, events: List[List[Any]]) -> Any: + def _submit_profile_events( + self, instance: str, events: List[List[Any]]) -> Any: """Handle a submit profile events request. Not implemented yet. diff --git a/libmuscle/python/libmuscle/mmp_client.py b/libmuscle/python/libmuscle/mmp_client.py index b474c4f0..571e968a 100644 --- a/libmuscle/python/libmuscle/mmp_client.py +++ b/libmuscle/python/libmuscle/mmp_client.py @@ -45,11 +45,15 @@ def encode_profile_event(event: ProfileEvent) -> Any: Returns: A list with its attributes, for MMP serialisation. """ + if event.start_time is None or event.stop_time is None: + raise RuntimeError( + 'Incomplete ProfileEvent sent. This is a bug, please' + ' report it.') + encoded_port = encode_port(event.port) if event.port else None return [ - str(event.instance_id), - event.start_time.seconds, event.stop_time.seconds, event.event_type.value, + event.start_time.seconds, event.stop_time.seconds, encoded_port, event.port_length, event.slot, event.message_size, event.message_timestamp] @@ -103,12 +107,13 @@ class MMPClient(): It manages the connection, and converts between our native types and the gRPC generated types. """ - def __init__(self, location: str) -> None: + def __init__(self, instance_id: Reference, location: str) -> None: """Create an MMPClient Args: location: A connection string of the form hostname:port """ + self._instance_id = instance_id self._transport_client = TcpTransportClient(location) def close(self) -> None: @@ -139,6 +144,7 @@ def submit_profile_events(self, events: Iterable[ProfileEvent]) -> None: """ request = [ RequestType.SUBMIT_PROFILE_EVENTS.value, + str(self._instance_id), [encode_profile_event(e) for e in events]] self._call_manager(request) @@ -180,19 +186,18 @@ def get_checkpoint_info(self, name: Reference) -> _CheckpointInfoType: response = self._call_manager(request) return decode_checkpoint_info(*response[1:]) - def register_instance(self, name: Reference, locations: List[str], - ports: List[Port]) -> None: + def register_instance( + self, locations: List[str], ports: List[Port]) -> None: """Register a component instance with the manager. Args: - name: Name of the instance in the simulation. locations: List of places where the instance can be reached. ports: List of ports of this instance. """ request = [ RequestType.REGISTER_INSTANCE.value, - str(name), locations, + str(self._instance_id), locations, [encode_port(p) for p in ports], libmuscle.__version__] response = self._call_manager(request) @@ -200,11 +205,10 @@ def register_instance(self, name: Reference, locations: List[str], raise RuntimeError( f'Error registering instance: {response[1]}') - def request_peers( - self, name: Reference) -> Tuple[ - List[Conduit], - Dict[Reference, List[int]], - Dict[Reference, List[str]]]: + def request_peers(self) -> Tuple[ + List[Conduit], + Dict[Reference, List[int]], + Dict[Reference, List[str]]]: """Request connection information about peers. This will repeat the request at an exponentially increasing @@ -212,9 +216,6 @@ def request_peers( specified by PEER_INTERVAL_MIN and PEER_INTERVAL_MAX. From there on, intervals are drawn randomly from that range. - Args: - name: Name of the current instance. - Returns: A tuple containing a list of conduits that this instance is attached to, a dictionary of peer dimensions, which is @@ -227,7 +228,7 @@ def request_peers( sleep_time = 0.1 start_time = perf_counter() - request = [RequestType.GET_PEERS.value, str(name)] + request = [RequestType.GET_PEERS.value, str(self._instance_id)] response = self._call_manager(request) while (response[0] == ResponseType.PENDING.value and @@ -261,13 +262,11 @@ def request_peers( return conduits, peer_dimensions, peer_locations - def deregister_instance(self, name: Reference) -> None: + def deregister_instance(self) -> None: """Deregister a component instance with the manager. - - Args: - name: Name of the instance in the simulation. """ - request = [RequestType.DEREGISTER_INSTANCE.value, str(name)] + request = [ + RequestType.DEREGISTER_INSTANCE.value, str(self._instance_id)] response = self._call_manager(request) if response[0] == ResponseType.ERROR.value: diff --git a/libmuscle/python/libmuscle/profiler.py b/libmuscle/python/libmuscle/profiler.py index b58dc645..ef990a5a 100644 --- a/libmuscle/python/libmuscle/profiler.py +++ b/libmuscle/python/libmuscle/profiler.py @@ -1,59 +1,23 @@ -from typing import List, Optional - -from ymmsl import Port, Reference +from typing import List from libmuscle.mmp_client import MMPClient -from libmuscle.profiling import ProfileEvent, ProfileEventType +from libmuscle.profiling import ProfileEvent from libmuscle.timestamp import Timestamp class Profiler: """Collects profiling events and sends them to the manager. """ - def __init__(self, instance_id: Reference, manager: MMPClient) -> None: + def __init__(self, manager: MMPClient) -> None: """Create a Profiler. Args: manager: The client used to submit data to the manager. """ # TODO: use a background thread for flushing - self._instance_id = instance_id self._manager = manager self._events: List[ProfileEvent] = [] - def start(self, event_type: ProfileEventType, port: Optional[Port] = None, - port_length: Optional[int] = None, slot: Optional[int] = None, - message_size: Optional[int] = None, - message_timestamp: Optional[float] = None - ) -> ProfileEvent: - """Start measuring an event. - - Call this, then call stop() on the returned ProfileEvent at - the end of the event. - - Args: - instance_id: Instance for which this event occurred. - event_type: Type of event that occurred. - port: Port that was sent or received on. - port_length: Length of the port, if vector. - slot: Slot that was sent or received on. - message_size: Size in bytes of the message. - message_timestamp: Timestamp of the message. - - Returns: - A new ProfileEvent. - """ - if len(self._events) >= 100: - self.__flush() - - now = Timestamp() - event = ProfileEvent( - self._instance_id, now, now, event_type, port, port_length, - slot, message_size, message_timestamp) - - self._events.append(event) - return event - def shutdown(self) -> None: self.__flush() @@ -61,16 +25,19 @@ def record_event(self, event: ProfileEvent) -> None: """Record a profiling event. This will record the event, and may flush this and previously - recorded events to the manager. + recorded events to the manager. If the time is still running, + it will be stopped. Other than this the event must be complete + when it is submitted. Do not use the event object after calling + this function with it. Args: event: The event to record. """ + if event.stop_time is None: + event.stop_time = Timestamp() self._events.append(event) if len(self._events) >= 100: self.__flush() - if event.event_type == ProfileEventType.DEREGISTER: - self.__flush() def __flush(self) -> None: if self._events: diff --git a/libmuscle/python/libmuscle/profiling.py b/libmuscle/python/libmuscle/profiling.py index 6c20aa10..4f3b56ee 100644 --- a/libmuscle/python/libmuscle/profiling.py +++ b/libmuscle/python/libmuscle/profiling.py @@ -1,7 +1,7 @@ from enum import Enum from typing import Optional -from ymmsl import Port, Reference +from ymmsl import Port from libmuscle.timestamp import Timestamp @@ -25,14 +25,15 @@ class ProfileEvent: This represents a single measurement of the timing of some event that occurred while executing the simulation. + Note that instance_id gets set by the profiler after submitting + the event, so it doesn't get passed in the constructor. + Args: - instance_id: The identifier of the instance that generated - this message. + event_type: Type of event that was measured. start_time: When the event started (real-world, not simulation time). stop_time: When the event ended (real-world, not simulation time). - event_type: Type of event that was measured. port: Port used for sending or receiving, if applicable. port_length: Length of that port, if a vector. slot: Slot that was sent or received on, if applicable. @@ -41,13 +42,11 @@ class ProfileEvent: applicable. Attributes: - instance_id: The identifier of the instance that generated - this message. + event_type: Type of event that was measured. start_time: When the event started (real-world, not simulation time). stop_time: When the event ended (real-world, not simulation time). - event_type: Type of event that was measured. port: Port used for sending or receiving, if applicable. port_length: Length of that port, if a vector. slot: Slot that was sent or received on, if applicable. @@ -57,10 +56,9 @@ class ProfileEvent: """ def __init__( self, - instance_id: Reference, - start_time: Timestamp, - stop_time: Timestamp, event_type: ProfileEventType, + start_time: Optional[Timestamp] = None, + stop_time: Optional[Timestamp] = None, port: Optional[Port] = None, port_length: Optional[int] = None, slot: Optional[int] = None, @@ -68,16 +66,20 @@ def __init__( message_timestamp: Optional[float] = None ) -> None: - self.instance_id = instance_id + self.event_type = event_type self.start_time = start_time self.stop_time = stop_time - self.event_type = event_type self.port = port self.port_length = port_length self.slot = slot self.message_size = message_size self.message_timestamp = message_timestamp + def start(self) -> None: + """Sets start_time to the current time. + """ + self.start_time = Timestamp() + def stop(self) -> None: """Sets stop_time to the current time. """ diff --git a/libmuscle/python/libmuscle/test/conftest.py b/libmuscle/python/libmuscle/test/conftest.py index 201a10f4..e8a0e29b 100644 --- a/libmuscle/python/libmuscle/test/conftest.py +++ b/libmuscle/python/libmuscle/test/conftest.py @@ -1,7 +1,7 @@ import pytest from unittest.mock import patch -from ymmsl import Settings +from ymmsl import Reference, Settings from libmuscle.api_guard import APIGuard from libmuscle.communicator import Message @@ -11,7 +11,7 @@ @pytest.fixture def mocked_mmp_client(): with patch('libmuscle.mmp_client.TcpTransportClient') as mock_ttc: - yield MMPClient(''), mock_ttc.return_value + yield MMPClient(Reference('component[13]'), ''), mock_ttc.return_value @pytest.fixture diff --git a/libmuscle/python/libmuscle/test/test_instance.py b/libmuscle/python/libmuscle/test/test_instance.py index 53a3b302..84e0973f 100644 --- a/libmuscle/python/libmuscle/test/test_instance.py +++ b/libmuscle/python/libmuscle/test/test_instance.py @@ -95,7 +95,8 @@ def test_create_instance( assert isinstance(instance._settings_manager, SettingsManager) assert len(instance._settings_manager.base) == 0 assert len(instance._settings_manager.overlay) == 0 - mmp_client.assert_called_once_with('localhost:9000') + mmp_client.assert_called_once_with( + Reference('test_instance[13][42]'), 'localhost:9000') assert mmp_client_object._register.called_with() assert mmp_client_object._connect.called_with() comm_type.assert_called_with(Reference('test_instance'), [13, 42], diff --git a/libmuscle/python/libmuscle/test/test_mmp_client.py b/libmuscle/python/libmuscle/test/test_mmp_client.py index 51874e5d..c780ed40 100644 --- a/libmuscle/python/libmuscle/test/test_mmp_client.py +++ b/libmuscle/python/libmuscle/test/test_mmp_client.py @@ -13,7 +13,7 @@ def test_init() -> None: with patch('libmuscle.mmp_client.TcpTransportClient') as mock_ttc: stub = mock_ttc.return_value - client = MMPClient('') + client = MMPClient(Reference([]), '') assert client._transport_client == stub # type: ignore @@ -22,7 +22,7 @@ def test_connection_fail() -> None: with pytest.raises(RuntimeError): # Port 255 is reserved and privileged, so there's probably # nothing there. - MMPClient('tcp:localhost:255') + MMPClient(Reference([]), 'tcp:localhost:255') def test_submit_log_message(mocked_mmp_client) -> None: @@ -78,14 +78,13 @@ def test_register_instance(mocked_mmp_client) -> None: stub.call.return_value = msgpack.packb(result, use_bin_type=True) client.register_instance( - Reference('kernel[13]'), ['direct:test', 'tcp:test'], [Port('out', Operator.O_I), Port('in', Operator.S)]) assert stub.call.called sent_msg = msgpack.unpackb(stub.call.call_args[0][0], raw=False) assert sent_msg == [ - RequestType.REGISTER_INSTANCE.value, 'kernel[13]', + RequestType.REGISTER_INSTANCE.value, 'component[13]', ['direct:test', 'tcp:test'], [['out', 'O_I'], ['in', 'S']], libmuscle.__version__] @@ -100,12 +99,12 @@ def test_request_peers(mocked_mmp_client) -> None: {'other': ['direct:test', 'tcp:test']}] stub.call.return_value = msgpack.packb(result_msg, use_bin_type=True) - result = client.request_peers(Reference('kernel[13]')) + result = client.request_peers() assert stub.call.called sent_msg = msgpack.unpackb(stub.call.call_args[0][0], raw=False) assert sent_msg[0] == RequestType.GET_PEERS.value - assert sent_msg[1] == 'kernel[13]' + assert sent_msg[1] == 'component[13]' assert len(result[0]) == 1 assert isinstance(result[0][0], Conduit) @@ -126,7 +125,7 @@ def test_request_peers_error(mocked_mmp_client) -> None: stub.call.return_value = msgpack.packb(result_msg, use_bin_type=True) with pytest.raises(RuntimeError): - client.request_peers(Reference('kernel[13]')) + client.request_peers() def test_request_peers_timeout(mocked_mmp_client) -> None: @@ -139,7 +138,7 @@ def test_request_peers_timeout(mocked_mmp_client) -> None: patch('libmuscle.mmp_client.PEER_INTERVAL_MIN', 0.1), \ patch('libmuscle.mmp_client.PEER_INTERVAL_MAX', 1.0): with pytest.raises(RuntimeError): - client.request_peers(Reference('kernel[13]')) + client.request_peers() def test_deregister_instance(mocked_mmp_client) -> None: @@ -148,22 +147,22 @@ def test_deregister_instance(mocked_mmp_client) -> None: result = [ResponseType.SUCCESS.value] stub.call.return_value = msgpack.packb(result, use_bin_type=True) - client.deregister_instance(Reference('kernel[13]')) + client.deregister_instance() assert stub.call.called sent_msg = msgpack.unpackb(stub.call.call_args[0][0], raw=False) - assert sent_msg == [RequestType.DEREGISTER_INSTANCE.value, 'kernel[13]'] + assert sent_msg == [RequestType.DEREGISTER_INSTANCE.value, 'component[13]'] def test_deregister_instance_error(mocked_mmp_client) -> None: client, stub = mocked_mmp_client - result = [ResponseType.ERROR.value, 'Instance kernel[13] unknown'] + result = [ResponseType.ERROR.value, 'Instance component[13] unknown'] stub.call.return_value = msgpack.packb(result, use_bin_type=True) with pytest.raises(RuntimeError): - client.deregister_instance(Reference('kernel[13]')) + client.deregister_instance() assert stub.call.called sent_msg = msgpack.unpackb(stub.call.call_args[0][0], raw=False) - assert sent_msg == [RequestType.DEREGISTER_INSTANCE.value, 'kernel[13]'] + assert sent_msg == [RequestType.DEREGISTER_INSTANCE.value, 'component[13]'] From 176082118abfa6ac25b89a4a3dadebec81e7c1c1 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Wed, 16 Nov 2022 20:45:51 +0100 Subject: [PATCH 008/188] Profile receiving in more detail --- integration_test/test_cpp_tcp_server.py | 3 +- libmuscle/python/libmuscle/communicator.py | 32 +++++++++--- .../libmuscle/mcp/tcp_transport_client.py | 15 ++++-- .../libmuscle/mcp/test/test_tcp_transport.py | 2 +- .../python/libmuscle/mcp/transport_client.py | 18 +++++-- libmuscle/python/libmuscle/mmp_client.py | 2 +- libmuscle/python/libmuscle/mpp_client.py | 8 +-- libmuscle/python/libmuscle/mpp_message.py | 2 +- libmuscle/python/libmuscle/profiling.py | 3 ++ libmuscle/python/libmuscle/test/conftest.py | 7 +++ .../libmuscle/test/test_communicator.py | 50 ++++++++++--------- .../python/libmuscle/test/test_mmp_client.py | 40 +++++++++------ 12 files changed, 119 insertions(+), 63 deletions(-) diff --git a/integration_test/test_cpp_tcp_server.py b/integration_test/test_cpp_tcp_server.py index 974fc128..912b838d 100644 --- a/integration_test/test_cpp_tcp_server.py +++ b/integration_test/test_cpp_tcp_server.py @@ -37,7 +37,8 @@ def test_cpp_tcp_server(log_file_in_tmpdir): assert TcpTransportClient.can_connect_to(location) client = MPPClient([location]) - msg = MPPMessage.from_bytes(client.receive(Reference('test_receiver.port'))) + msg_bytes, _ = client.receive(Reference('test_receiver.port')) + msg = MPPMessage.from_bytes(msg_bytes) client.close() # assert stuff diff --git a/libmuscle/python/libmuscle/communicator.py b/libmuscle/python/libmuscle/communicator.py index f0eb64e8..9fb6bc64 100644 --- a/libmuscle/python/libmuscle/communicator.py +++ b/libmuscle/python/libmuscle/communicator.py @@ -227,14 +227,14 @@ def send_message( port_length = port.get_length() for recv_endpoint in recv_endpoints: - mcp_message = MPPMessage(snd_endpoint.ref(), recv_endpoint.ref(), + mpp_message = MPPMessage(snd_endpoint.ref(), recv_endpoint.ref(), port_length, message.timestamp, message.next_timestamp, cast(Settings, message.settings), port.get_num_messages(slot), checkpoints_considered_until, message.data) - encoded_message = mcp_message.encoded() + encoded_message = mpp_message.encoded() self._post_office.deposit(recv_endpoint.ref(), encoded_message) port.increment_num_messages(slot) @@ -304,7 +304,7 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, # built-in automatic ports. port = self._muscle_settings_in - profile_event = ProfileEvent( + receive_event = ProfileEvent( ProfileEventType.RECEIVE, Timestamp(), None, port, None, slot) # peer_manager already checks that there is at most one snd_endpoint @@ -312,8 +312,13 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, snd_endpoint = self._peer_manager.get_peer_endpoints( recv_endpoint.port, slot_list)[0] client = self.__get_client(snd_endpoint.instance()) - mpp_message_bytes = client.receive(recv_endpoint.ref()) + mpp_message_bytes, profile = client.receive(recv_endpoint.ref()) + + recv_decode_event = ProfileEvent( + ProfileEventType.RECEIVE_DECODE, Timestamp(), None, port, None, + slot, len(mpp_message_bytes)) mpp_message = MPPMessage.from_bytes(mpp_message_bytes) + self._profiler.record_event(recv_decode_event) if mpp_message.port_length is not None: if port.is_resizable(): @@ -326,11 +331,22 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, mpp_message.timestamp, mpp_message.next_timestamp, mpp_message.data, mpp_message.settings_overlay) - profile_event.stop() - profile_event.message_timestamp = message.timestamp + recv_wait_event = ProfileEvent( + ProfileEventType.RECEIVE_WAIT, profile[0], profile[1], port, + mpp_message.port_length, slot) + self._profiler.record_event(recv_wait_event) + + recv_xfer_event = ProfileEvent( + ProfileEventType.RECEIVE_TRANSFER, profile[1], profile[2], + port, mpp_message.port_length, slot, len(mpp_message_bytes), + message.timestamp) + self._profiler.record_event(recv_xfer_event) + + receive_event.message_timestamp = message.timestamp if port.is_vector(): - profile_event.port_length = port.get_length() - profile_event.message_size = len(mpp_message_bytes) + receive_event.port_length = port.get_length() + receive_event.message_size = len(mpp_message_bytes) + self._profiler.record_event(receive_event) expected_message_number = port.get_num_messages(slot) if expected_message_number != mpp_message.message_number: diff --git a/libmuscle/python/libmuscle/mcp/tcp_transport_client.py b/libmuscle/python/libmuscle/mcp/tcp_transport_client.py index 5fd80b9f..83852850 100644 --- a/libmuscle/python/libmuscle/mcp/tcp_transport_client.py +++ b/libmuscle/python/libmuscle/mcp/tcp_transport_client.py @@ -1,8 +1,10 @@ import socket -from typing import Optional +from time import time +from typing import Optional, Tuple -from libmuscle.mcp.transport_client import TransportClient +from libmuscle.mcp.transport_client import ProfileData, TransportClient from libmuscle.mcp.tcp_util import recv_all, recv_int64, send_int64 +from libmuscle.timestamp import Timestamp class TcpTransportClient(TransportClient): @@ -49,7 +51,7 @@ def __init__(self, location: str) -> None: sock.setsockopt(socket.SOL_TCP, socket.TCP_QUICKACK, 1) self._socket = sock - def call(self, request: bytes) -> bytes: + def call(self, request: bytes) -> Tuple[bytes, ProfileData]: """Send a request to the server and receive the response. This is a blocking call. @@ -60,11 +62,16 @@ def call(self, request: bytes) -> bytes: Returns: The received response """ + start_wait = Timestamp(time()) send_int64(self._socket, len(request)) self._socket.sendall(request) length = recv_int64(self._socket) - return recv_all(self._socket, length) + start_transfer = Timestamp(time()) + + response = recv_all(self._socket, length) + stop_transfer = Timestamp(time()) + return response, (start_wait, start_transfer, stop_transfer) def close(self) -> None: """Closes this client. diff --git a/libmuscle/python/libmuscle/mcp/test/test_tcp_transport.py b/libmuscle/python/libmuscle/mcp/test/test_tcp_transport.py index f98a34ed..bf71da24 100644 --- a/libmuscle/python/libmuscle/mcp/test/test_tcp_transport.py +++ b/libmuscle/python/libmuscle/mcp/test/test_tcp_transport.py @@ -23,7 +23,7 @@ def handle_request(request: bytes) -> bytes: assert TcpTransportClient.can_connect_to(server_location) client = TcpTransportClient(server_location) - response2 = client.call(request) + response2, _ = client.call(request) assert response == response2 client.close() diff --git a/libmuscle/python/libmuscle/mcp/transport_client.py b/libmuscle/python/libmuscle/mcp/transport_client.py index f85eea26..183cc909 100644 --- a/libmuscle/python/libmuscle/mcp/transport_client.py +++ b/libmuscle/python/libmuscle/mcp/transport_client.py @@ -1,3 +1,11 @@ +from typing import Tuple + +from libmuscle.timestamp import Timestamp + + +ProfileData = Tuple[Timestamp, Timestamp, Timestamp] + + class TransportClient: """A client that connects to an MCP server. @@ -17,16 +25,20 @@ def can_connect_to(location: str) -> bool: """ raise NotImplementedError() # pragma: no cover - def call(self, request: bytes) -> bytes: + def call(self, request: bytes) -> Tuple[bytes, ProfileData]: """Send a request to the server and receive the response. - This is a blocking call. + This is a blocking call. Besides the result, this function + returns a tuple with three timestamps (floats in seconds since + the epoch). These were taken when the function was first called, + when data became available and the transfer started, and when + the transfer stopped. Args: request: The request to send Returns: - The received response + The received response, and the timestamps """ raise NotImplementedError() # pragma: no cover diff --git a/libmuscle/python/libmuscle/mmp_client.py b/libmuscle/python/libmuscle/mmp_client.py index 571e968a..7c203383 100644 --- a/libmuscle/python/libmuscle/mmp_client.py +++ b/libmuscle/python/libmuscle/mmp_client.py @@ -283,5 +283,5 @@ def _call_manager(self, request: Any) -> Any: The decoded response """ encoded_request = msgpack.packb(request, use_bin_type=True) - response = self._transport_client.call(encoded_request) + response, _ = self._transport_client.call(encoded_request) return msgpack.unpackb(response, raw=False) diff --git a/libmuscle/python/libmuscle/mpp_client.py b/libmuscle/python/libmuscle/mpp_client.py index e7fa5172..852ec938 100644 --- a/libmuscle/python/libmuscle/mpp_client.py +++ b/libmuscle/python/libmuscle/mpp_client.py @@ -1,10 +1,10 @@ -from typing import List, Optional +from typing import List, Optional, Tuple import msgpack from ymmsl import Reference from libmuscle.mcp.protocol import RequestType -from libmuscle.mcp.transport_client import TransportClient +from libmuscle.mcp.transport_client import ProfileData, TransportClient from libmuscle.mcp.type_registry import transport_client_types @@ -40,14 +40,14 @@ def __init__(self, locations: List[str]) -> None: self._transport_client = client - def receive(self, receiver: Reference) -> bytes: + def receive(self, receiver: Reference) -> Tuple[bytes, ProfileData]: """Receive a message from a port this client connects to. Args: receiver: The receiving (local) port. Returns: - The received message. + The received message, and profiling data """ request = [RequestType.GET_NEXT_MESSAGE.value, str(receiver)] encoded_request = msgpack.packb(request, use_bin_type=True) diff --git a/libmuscle/python/libmuscle/mpp_message.py b/libmuscle/python/libmuscle/mpp_message.py index e78d8624..a239a2c2 100644 --- a/libmuscle/python/libmuscle/mpp_message.py +++ b/libmuscle/python/libmuscle/mpp_message.py @@ -141,7 +141,7 @@ def _ext_decoder(code: int, data: bytes) -> msgpack.ExtType: class MPPMessage: - """A MUSCLE Communication Protocol message. + """A MUSCLE Peer Protocol message. Messages carry the identity of their sender and receiver, so that they can be routed by a MUSCLE Transport Overlay when we get to diff --git a/libmuscle/python/libmuscle/profiling.py b/libmuscle/python/libmuscle/profiling.py index 4f3b56ee..4e5d753d 100644 --- a/libmuscle/python/libmuscle/profiling.py +++ b/libmuscle/python/libmuscle/profiling.py @@ -17,6 +17,9 @@ class ProfileEventType(Enum): DEREGISTER = 1 SEND = 2 RECEIVE = 3 + RECEIVE_WAIT = 5 + RECEIVE_TRANSFER = 6 + RECEIVE_DECODE = 7 class ProfileEvent: diff --git a/libmuscle/python/libmuscle/test/conftest.py b/libmuscle/python/libmuscle/test/conftest.py index e8a0e29b..f84ea289 100644 --- a/libmuscle/python/libmuscle/test/conftest.py +++ b/libmuscle/python/libmuscle/test/conftest.py @@ -5,7 +5,9 @@ from libmuscle.api_guard import APIGuard from libmuscle.communicator import Message +from libmuscle.mcp.transport_client import ProfileData from libmuscle.mmp_client import MMPClient +from libmuscle.timestamp import Timestamp @pytest.fixture @@ -27,3 +29,8 @@ def message2() -> Message: @pytest.fixture def guard() -> APIGuard: return APIGuard(True) + + +@pytest.fixture +def profile_data() -> ProfileData: + return Timestamp(0.0), Timestamp(0.0), Timestamp(0.0) diff --git a/libmuscle/python/libmuscle/test/test_communicator.py b/libmuscle/python/libmuscle/test/test_communicator.py index 140e9399..c3561bab 100644 --- a/libmuscle/python/libmuscle/test/test_communicator.py +++ b/libmuscle/python/libmuscle/test/test_communicator.py @@ -393,12 +393,12 @@ def test_close_port(communicator) -> None: assert isinstance(msg.data, ClosePort) -def test_receive_message(communicator) -> None: +def test_receive_message(communicator, profile_data) -> None: client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('other.out[13]'), Reference('kernel[13].in'), None, 0.0, None, Settings({'test1': 12}), 0, 2.0, - b'test').encoded() + b'test').encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator._Communicator__get_client = get_client_mock communicator._profiler = MagicMock() @@ -435,12 +435,12 @@ def test_receive_on_invalid_port(communicator) -> None: communicator.receive_message('@$Invalid_id') -def test_receive_msgpack(communicator) -> None: +def test_receive_msgpack(communicator, profile_data) -> None: client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('other.out[13]'), Reference('kernel[13].in'), None, 0.0, None, Settings({'test1': 12}), 0, 1.0, - {'test': 13}).encoded() + {'test': 13}).encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator._Communicator__get_client = get_client_mock communicator._profiler = MagicMock() @@ -453,12 +453,12 @@ def test_receive_msgpack(communicator) -> None: assert last_saved == 1.0 -def test_receive_with_slot(communicator2) -> None: +def test_receive_with_slot(communicator2, profile_data) -> None: client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('kernel[13].out'), Reference('other.in[13]'), None, 0.0, None, Settings({'test': 'testing'}), 0, 3.0, - b'test').encoded() + b'test').encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator2._Communicator__get_client = get_client_mock communicator2._profiler = MagicMock() @@ -472,12 +472,12 @@ def test_receive_with_slot(communicator2) -> None: assert last_saved == 3.0 -def test_receive_message_resizable(communicator3) -> None: +def test_receive_message_resizable(communicator3, profile_data) -> None: client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('other.out[13]'), Reference('kernel.in[13]'), 20, 0.0, None, Settings({'test': 'testing'}), 0, 12.3, - b'test').encoded() + b'test').encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator3._Communicator__get_client = get_client_mock communicator3._profiler = MagicMock() @@ -491,12 +491,12 @@ def test_receive_message_resizable(communicator3) -> None: assert last_saved == 12.3 -def test_receive_with_settings(communicator) -> None: +def test_receive_with_settings(communicator, profile_data) -> None: client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('other.out[13]'), Reference('kernel[13].in'), None, 0.0, None, Settings({'test2': 3.1}), 0, 0.1, - b'test').encoded() + b'test').encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator._Communicator__get_client = get_client_mock communicator._profiler = MagicMock() @@ -510,12 +510,13 @@ def test_receive_with_settings(communicator) -> None: assert last_saved == 0.1 -def test_receive_msgpack_with_slot_and_settings(communicator2) -> None: +def test_receive_msgpack_with_slot_and_settings( + communicator2, profile_data) -> None: client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('kernel[13].out'), Reference('other.in[13]'), - None, 0.0, 1.0, - Settings({'test': 'testing'}), 0, 1.0, 'test').encoded() + None, 0.0, 1.0, Settings({'test': 'testing'}), 0, 1.0, + 'test').encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator2._Communicator__get_client = get_client_mock communicator2._profiler = MagicMock() @@ -529,12 +530,12 @@ def test_receive_msgpack_with_slot_and_settings(communicator2) -> None: assert last_saved == 1.0 -def test_receive_settings(communicator) -> None: +def test_receive_settings(communicator, profile_data) -> None: client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('other.out[13]'), Reference('kernel[13].in'), None, 0.0, None, Settings({'test1': 12}), 0, 1.0, - Settings({'test': 13})).encoded() + Settings({'test': 13})).encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator._Communicator__get_client = get_client_mock communicator._profiler = MagicMock() @@ -548,11 +549,12 @@ def test_receive_settings(communicator) -> None: assert last_saved == 1.0 -def test_receive_close_port(communicator) -> None: +def test_receive_close_port(communicator, profile_data) -> None: client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('other.out[13]'), Reference('kernel[13].in'), - None, 0.0, None, Settings(), 0, 0.1, ClosePort()).encoded() + None, 0.0, None, Settings(), 0, 0.1, ClosePort() + ).encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator._Communicator__get_client = get_client_mock communicator._profiler = MagicMock() @@ -621,12 +623,12 @@ def test_vector_port_message_counts(communicator2, message) -> None: communicator2._post_office.get_message('kernel[13].in') -def test_port_count_validation(communicator): +def test_port_count_validation(communicator, profile_data): client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('other.out[13]'), Reference('kernel[13].in'), None, 0.0, None, Settings({'test1': 12}), 0, 7.6, - b'test').encoded() + b'test').encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator._Communicator__get_client = get_client_mock communicator._profiler = MagicMock() @@ -639,12 +641,12 @@ def test_port_count_validation(communicator): communicator.receive_message('in') -def test_port_discard_error_on_resume(caplog, communicator): +def test_port_discard_error_on_resume(caplog, communicator, profile_data): client_mock = MagicMock() client_mock.receive.return_value = MPPMessage( Reference('other.out[13]'), Reference('kernel[13].in'), None, 0.0, None, Settings({'test1': 12}), 1, 2.3, - b'test').encoded() + b'test').encoded(), profile_data get_client_mock = MagicMock(return_value=client_mock) communicator._Communicator__get_client = get_client_mock communicator._profiler = MagicMock() @@ -666,12 +668,12 @@ def test_port_discard_error_on_resume(caplog, communicator): assert 'Discarding received message' in caplog.records[1].message -def test_port_discard_success_on_resume(caplog, communicator): +def test_port_discard_success_on_resume(caplog, communicator, profile_data): client_mock = MagicMock() - client_mock.receive.side_effect = [MPPMessage( + client_mock.receive.side_effect = [(MPPMessage( Reference('other.out[13]'), Reference('kernel[13].in'), None, 0.0, None, Settings({'test1': 12}), message_number, 1.0, - {'this is message': message_number}).encoded() + {'this is message': message_number}).encoded(), profile_data) for message_number in [1, 2]] get_client_mock = MagicMock(return_value=client_mock) communicator._Communicator__get_client = get_client_mock diff --git a/libmuscle/python/libmuscle/test/test_mmp_client.py b/libmuscle/python/libmuscle/test/test_mmp_client.py index c780ed40..138efa0a 100644 --- a/libmuscle/python/libmuscle/test/test_mmp_client.py +++ b/libmuscle/python/libmuscle/test/test_mmp_client.py @@ -25,10 +25,11 @@ def test_connection_fail() -> None: MMPClient(Reference([]), 'tcp:localhost:255') -def test_submit_log_message(mocked_mmp_client) -> None: +def test_submit_log_message(mocked_mmp_client, profile_data) -> None: client, stub = mocked_mmp_client result = [ResponseType.SUCCESS.value] - stub.call.return_value = msgpack.packb(result, use_bin_type=True) + stub.call.return_value = ( + msgpack.packb(result, use_bin_type=True), profile_data) message = LogMessage( 'test_mmp_client', @@ -48,7 +49,7 @@ def test_submit_log_message(mocked_mmp_client) -> None: 'Testing the MMPClient'] -def test_get_settings(mocked_mmp_client) -> None: +def test_get_settings(mocked_mmp_client, profile_data) -> None: client, stub = mocked_mmp_client settings_msg = { @@ -59,7 +60,8 @@ def test_get_settings(mocked_mmp_client) -> None: 'test5': [1.2, 3.4], 'test6': [[1.2, 3.4], [5.6, 7.8]]} transport_result = [ResponseType.SUCCESS.value, settings_msg] - stub.call.return_value = msgpack.packb(transport_result, use_bin_type=True) + stub.call.return_value = ( + msgpack.packb(transport_result, use_bin_type=True), profile_data) settings = client.get_settings() assert len(settings) == 6 @@ -71,11 +73,12 @@ def test_get_settings(mocked_mmp_client) -> None: assert settings['test6'] == [[1.2, 3.4], [5.6, 7.8]] -def test_register_instance(mocked_mmp_client) -> None: +def test_register_instance(mocked_mmp_client, profile_data) -> None: client, stub = mocked_mmp_client result = [ResponseType.SUCCESS.value] - stub.call.return_value = msgpack.packb(result, use_bin_type=True) + stub.call.return_value = ( + msgpack.packb(result, use_bin_type=True), profile_data) client.register_instance( ['direct:test', 'tcp:test'], @@ -89,7 +92,7 @@ def test_register_instance(mocked_mmp_client) -> None: libmuscle.__version__] -def test_request_peers(mocked_mmp_client) -> None: +def test_request_peers(mocked_mmp_client, profile_data) -> None: client, stub = mocked_mmp_client result_msg = [ @@ -97,7 +100,8 @@ def test_request_peers(mocked_mmp_client) -> None: [['kernel.out', 'other.in']], {'other': [20]}, {'other': ['direct:test', 'tcp:test']}] - stub.call.return_value = msgpack.packb(result_msg, use_bin_type=True) + stub.call.return_value = ( + msgpack.packb(result_msg, use_bin_type=True), profile_data) result = client.request_peers() @@ -118,21 +122,23 @@ def test_request_peers(mocked_mmp_client) -> None: assert result[2]['other'] == ['direct:test', 'tcp:test'] -def test_request_peers_error(mocked_mmp_client) -> None: +def test_request_peers_error(mocked_mmp_client, profile_data) -> None: client, stub = mocked_mmp_client result_msg = [ResponseType.ERROR.value, 'test_error_message'] - stub.call.return_value = msgpack.packb(result_msg, use_bin_type=True) + stub.call.return_value = ( + msgpack.packb(result_msg, use_bin_type=True), profile_data) with pytest.raises(RuntimeError): client.request_peers() -def test_request_peers_timeout(mocked_mmp_client) -> None: +def test_request_peers_timeout(mocked_mmp_client, profile_data) -> None: client, stub = mocked_mmp_client result_msg = [ResponseType.PENDING.value, 'test_status_message'] - stub.call.return_value = msgpack.packb(result_msg, use_bin_type=True) + stub.call.return_value = ( + msgpack.packb(result_msg, use_bin_type=True), profile_data) with patch('libmuscle.mmp_client.PEER_TIMEOUT', 1), \ patch('libmuscle.mmp_client.PEER_INTERVAL_MIN', 0.1), \ @@ -141,11 +147,12 @@ def test_request_peers_timeout(mocked_mmp_client) -> None: client.request_peers() -def test_deregister_instance(mocked_mmp_client) -> None: +def test_deregister_instance(mocked_mmp_client, profile_data) -> None: client, stub = mocked_mmp_client result = [ResponseType.SUCCESS.value] - stub.call.return_value = msgpack.packb(result, use_bin_type=True) + stub.call.return_value = ( + msgpack.packb(result, use_bin_type=True), profile_data) client.deregister_instance() @@ -154,11 +161,12 @@ def test_deregister_instance(mocked_mmp_client) -> None: assert sent_msg == [RequestType.DEREGISTER_INSTANCE.value, 'component[13]'] -def test_deregister_instance_error(mocked_mmp_client) -> None: +def test_deregister_instance_error(mocked_mmp_client, profile_data) -> None: client, stub = mocked_mmp_client result = [ResponseType.ERROR.value, 'Instance component[13] unknown'] - stub.call.return_value = msgpack.packb(result, use_bin_type=True) + stub.call.return_value = ( + msgpack.packb(result, use_bin_type=True), profile_data) with pytest.raises(RuntimeError): client.deregister_instance() From 6dc58abb1013c4cc727c4bc46ef1c03614384854 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Thu, 24 Nov 2022 21:32:05 +0100 Subject: [PATCH 009/188] Clean up docstrings --- libmuscle/python/libmuscle/profiling.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/libmuscle/python/libmuscle/profiling.py b/libmuscle/python/libmuscle/profiling.py index 4e5d753d..6cfc8fc7 100644 --- a/libmuscle/python/libmuscle/profiling.py +++ b/libmuscle/python/libmuscle/profiling.py @@ -7,11 +7,7 @@ class ProfileEventType(Enum): - """Profiling event types for MUSCLE3. - - These match the types in the MUSCLE Manager Protocol, and should - be kept identical to those. - """ + """Profiling event types for MUSCLE3.""" REGISTER = 0 CONNECT = 4 DEREGISTER = 1 @@ -28,9 +24,6 @@ class ProfileEvent: This represents a single measurement of the timing of some event that occurred while executing the simulation. - Note that instance_id gets set by the profiler after submitting - the event, so it doesn't get passed in the constructor. - Args: event_type: Type of event that was measured. start_time: When the event started (real-world, not From 970896d4e804a923fbe6c3a852baec15ede103ee Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Thu, 24 Nov 2022 21:33:09 +0100 Subject: [PATCH 010/188] Refactor Timestamp in C++ --- libmuscle/cpp/src/libmuscle/logger.tpp | 4 ++-- libmuscle/cpp/src/libmuscle/timestamp.cpp | 17 ++++++++--------- libmuscle/cpp/src/libmuscle/timestamp.hpp | 8 ++++---- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/logger.tpp b/libmuscle/cpp/src/libmuscle/logger.tpp index 668a5a6b..d6e0ec8f 100644 --- a/libmuscle/cpp/src/libmuscle/logger.tpp +++ b/libmuscle/cpp/src/libmuscle/logger.tpp @@ -7,7 +7,7 @@ namespace libmuscle { namespace impl { template void Logger::log(LogLevel level, Args... args) { if ((level >= local_level_) || (level >= remote_level_)) { - auto ts = Timestamp::now(); + auto ts = Timestamp(); std::ostringstream oss; append_args_(oss, args...); @@ -17,7 +17,7 @@ void Logger::log(LogLevel level, Args... args) { } if (level >= remote_level_) { - LogMessage msg(instance_id_, Timestamp::now(), level, oss.str()); + LogMessage msg(instance_id_, Timestamp(), level, oss.str()); manager_.submit_log_message(msg); } } diff --git a/libmuscle/cpp/src/libmuscle/timestamp.cpp b/libmuscle/cpp/src/libmuscle/timestamp.cpp index 21ae1aca..f24a89f6 100644 --- a/libmuscle/cpp/src/libmuscle/timestamp.cpp +++ b/libmuscle/cpp/src/libmuscle/timestamp.cpp @@ -10,20 +10,19 @@ using wallclock = std::chrono::high_resolution_clock; namespace libmuscle { namespace impl { -Timestamp::Timestamp(double seconds) - : seconds(seconds) -{ - tzset(); -} - -Timestamp Timestamp::now() { +Timestamp::Timestamp() { auto since_epoch = wallclock::now().time_since_epoch(); double cycles = since_epoch.count(); - double seconds = cycles * wallclock::period::num / wallclock::period::den; - return Timestamp(seconds); + seconds = cycles * wallclock::period::num / wallclock::period::den; } +Timestamp::Timestamp(double seconds) + : seconds(seconds) +{} + std::ostream & operator<<(std::ostream & os, Timestamp ts) { + // tzset() needs to be called before localtime_r according to POSIX + tzset(); time_t time = static_cast(ts.seconds); struct tm time_tm; localtime_r(&time, &time_tm); diff --git a/libmuscle/cpp/src/libmuscle/timestamp.hpp b/libmuscle/cpp/src/libmuscle/timestamp.hpp index 95161c73..8268e25e 100644 --- a/libmuscle/cpp/src/libmuscle/timestamp.hpp +++ b/libmuscle/cpp/src/libmuscle/timestamp.hpp @@ -13,15 +13,15 @@ class Timestamp { */ double seconds; + /** Create a Timestamp representing the current time. + */ + Timestamp(); + /** Create a Timestamp. * * @param seconds The number of seconds since the UNIX epoch. */ Timestamp(double seconds); - - /** Create a Timestamp representing the current time. - */ - static Timestamp now(); }; std::ostream & operator<<(std::ostream & os, Timestamp ts); From 0d5f8c955803e990c17a776edf80bc829dfb9a66 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Tue, 29 Nov 2022 09:37:45 +0100 Subject: [PATCH 011/188] Factor instance_id into MMPClient --- libmuscle/cpp/src/libmuscle/instance.cpp | 9 ++++---- libmuscle/cpp/src/libmuscle/mmp_client.cpp | 23 +++++++++++-------- libmuscle/cpp/src/libmuscle/mmp_client.hpp | 12 +++++----- .../src/libmuscle/tests/mmp_client_test.cpp | 8 +++---- .../libmuscle/tests/mocks/mock_mmp_client.cpp | 17 +++++++------- .../libmuscle/tests/mocks/mock_mmp_client.hpp | 10 ++++---- .../cpp/src/libmuscle/tests/test_instance.cpp | 2 +- .../cpp/src/libmuscle/tests/test_logger.cpp | 5 ++-- 8 files changed, 46 insertions(+), 40 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index 7428c06d..addf9be8 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -161,7 +161,8 @@ Instance::Impl::Impl( MPI_Comm_dup(communicator, &mpi_comm_); if (mpi_barrier_.is_root()) { #endif - manager_.reset(new MMPClient(extract_manager_location_(argc, argv))); + manager_.reset( + new MMPClient(instance_name_, extract_manager_location_(argc, argv))); std::string instance_id = static_cast(instance_name_); std::string default_logfile = "muscle_" + instance_id + ".log"; @@ -414,7 +415,7 @@ void Instance::Impl::register_() { // TODO: profile this auto locations = communicator_->get_locations(); auto port_list = list_declared_ports_(); - manager_->register_instance(instance_name_, locations, port_list); + manager_->register_instance(locations, port_list); // TODO: stop profile logger_->info("Registered with the manager"); } @@ -423,7 +424,7 @@ void Instance::Impl::register_() { */ void Instance::Impl::connect_() { // TODO: profile this - auto peer_info = manager_->request_peers(instance_name_); + auto peer_info = manager_->request_peers(); communicator_->connect(std::get<0>(peer_info), std::get<1>(peer_info), std::get<2>(peer_info)); settings_manager_.base = manager_->get_settings(); // TODO: stop profile @@ -434,7 +435,7 @@ void Instance::Impl::connect_() { */ void Instance::Impl::deregister_() { // TODO: profile this - manager_->deregister_instance(instance_name_); + manager_->deregister_instance(); // TODO: stop profile // This is the last thing we'll profile, so flush messages // TODO: shut down profiler diff --git a/libmuscle/cpp/src/libmuscle/mmp_client.cpp b/libmuscle/cpp/src/libmuscle/mmp_client.cpp index 6a7f33d6..d3fa7119 100644 --- a/libmuscle/cpp/src/libmuscle/mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/mmp_client.cpp @@ -63,13 +63,14 @@ namespace { Data encode_port(ymmsl::Port const & port) { return Data::list(std::string(port.name), encode_operator(port.oper)); } - } namespace libmuscle { namespace impl { -MMPClient::MMPClient(std::string const & location) - : transport_client_(location) +MMPClient::MMPClient( + Reference const & instance_id, std::string const & location) + : instance_id_(instance_id) + , transport_client_(location) {} void MMPClient::close() { @@ -88,7 +89,6 @@ void MMPClient::submit_log_message(LogMessage const & message) { } void MMPClient::register_instance( - Reference const & name, std::vector const & locations, std::vector<::ymmsl::Port> const & ports) { @@ -102,8 +102,8 @@ void MMPClient::register_instance( auto request = Data::list( static_cast(RequestType::register_instance), - std::string(name), encoded_locs, encoded_ports, - MUSCLE3_VERSION); + static_cast(instance_id_), encoded_locs, + encoded_ports, MUSCLE3_VERSION); auto response = call_manager_(request); @@ -126,7 +126,7 @@ ymmsl::Settings MMPClient::get_settings() { return settings; } -auto MMPClient::request_peers(Reference const & name) -> +auto MMPClient::request_peers() -> std::tuple< std::vector<::ymmsl::Conduit>, std::unordered_map<::ymmsl::Reference, std::vector>, @@ -136,7 +136,9 @@ auto MMPClient::request_peers(Reference const & name) -> int sleep_time = 100; // milliseconds auto start_time = steady_clock::now(); - auto request = Data::list(static_cast(RequestType::get_peers), std::string(name)); + auto request = Data::list( + static_cast(RequestType::get_peers), + static_cast(instance_id_)); auto response = call_manager_(request); const int status_pending = static_cast(ResponseType::pending); @@ -199,9 +201,10 @@ auto MMPClient::request_peers(Reference const & name) -> std::move(peer_locations)); } -void MMPClient::deregister_instance(Reference const & name) { +void MMPClient::deregister_instance() { auto request = Data::list( - static_cast(RequestType::deregister_instance), std::string(name)); + static_cast(RequestType::deregister_instance), + static_cast(instance_id_)); auto response = call_manager_(request); if (response[0].as() == static_cast(ResponseType::error)) { std::ostringstream oss; diff --git a/libmuscle/cpp/src/libmuscle/mmp_client.hpp b/libmuscle/cpp/src/libmuscle/mmp_client.hpp index f15a5bad..37a5c465 100644 --- a/libmuscle/cpp/src/libmuscle/mmp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/mmp_client.hpp @@ -32,7 +32,9 @@ class MMPClient { * * @param location A connection string of the form hostname:port. */ - explicit MMPClient(std::string const & location); + explicit MMPClient( + ymmsl::Reference const & instance_id, + std::string const & location); /** Close the connection * @@ -55,12 +57,10 @@ class MMPClient { /** Register a component instance with the manager. * - * @param name Name of the instance in the simulation. * @param locations List of places where the instance can be reached. * @param ports List of ports of this instance. */ void register_instance( - ::ymmsl::Reference const & name, std::vector const & locations, std::vector<::ymmsl::Port> const & ports); @@ -71,7 +71,6 @@ class MMPClient { * peer_interval_min and peer_interval_max. From there on, intervals * are drawn randomly from that range. * - * @param name Name of the current instance. * @return A tuple containng a list of conduits that this instance is * attached to, a dictionary of peer dimensions, which is indexed * by Reference to the peer kernel and specifies how many @@ -80,16 +79,17 @@ class MMPClient { * containing for each peer instance a list of network location * strings at which it can be reached. */ - auto request_peers(::ymmsl::Reference const & name) -> + auto request_peers() -> std::tuple< std::vector<::ymmsl::Conduit>, std::unordered_map<::ymmsl::Reference, std::vector>, std::unordered_map<::ymmsl::Reference, std::vector> >; - void deregister_instance(::ymmsl::Reference const & name); + void deregister_instance(); private: + ymmsl::Reference instance_id_; mcp::TcpTransportClient transport_client_; /* Helper function that encodes/decodes and calls the manager. diff --git a/libmuscle/cpp/src/libmuscle/tests/mmp_client_test.cpp b/libmuscle/cpp/src/libmuscle/tests/mmp_client_test.cpp index 4123b78a..27c0550a 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mmp_client_test.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mmp_client_test.cpp @@ -42,12 +42,12 @@ void test_submit_log_message(MMPClient & client) { void test_register_instance(MMPClient & client) { client.register_instance( - Reference("micro[3]"), {"tcp:test1", "tcp:test2"}, + {"tcp:test1", "tcp:test2"}, {Port("out", Operator::O_F), Port("in", Operator::F_INIT)}); } void test_request_peers(MMPClient & client) { - auto result = client.request_peers("micro[3]"); + auto result = client.request_peers(); assert(std::get<0>(result).size() == 2); assert(std::get<0>(result)[0].sender == "macro.out"); assert(std::get<0>(result)[0].receiver == "micro.in"); @@ -65,11 +65,11 @@ void test_request_peers(MMPClient & client) { } void test_deregister_instance(MMPClient & client) { - client.deregister_instance(Reference("micro[3]")); + client.deregister_instance(); } int main(int argc, char *argv[]) { - MMPClient client(argv[1]); + MMPClient client(Reference("micro[3]"), argv[1]); test_get_settings(client); test_submit_log_message(client); diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp index b7d0c055..895bbff5 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp @@ -9,8 +9,11 @@ using ymmsl::Reference; namespace libmuscle { namespace impl { -MockMMPClient::MockMMPClient(std::string const & location) { +MockMMPClient::MockMMPClient( + Reference const & instance_id, std::string const & location) +{ ++num_constructed; + last_instance_id = instance_id; last_location = location; } @@ -21,11 +24,9 @@ void MockMMPClient::submit_log_message(LogMessage const & message) { } void MockMMPClient::register_instance( - Reference const & name, std::vector const & locations, std::vector<::ymmsl::Port> const & ports) { - last_registered_name = name; last_registered_locations = locations; last_registered_ports = ports; } @@ -37,7 +38,7 @@ ymmsl::Settings MockMMPClient::get_settings() { return settings; } -auto MockMMPClient::request_peers(Reference const & name) -> +auto MockMMPClient::request_peers() -> std::tuple< std::vector<::ymmsl::Conduit>, std::unordered_map<::ymmsl::Reference, std::vector>, @@ -56,12 +57,12 @@ auto MockMMPClient::request_peers(Reference const & name) -> std::move(peer_locations)); } -void MockMMPClient::deregister_instance(Reference const & name) {} +void MockMMPClient::deregister_instance() {} void MockMMPClient::reset() { num_constructed = 0; + last_instance_id = Reference("NONE"); last_location = ""; - last_registered_name = "_none"; last_registered_locations.clear(); last_registered_ports.clear(); last_submitted_log_message.instance_id = ""; @@ -70,12 +71,12 @@ void MockMMPClient::reset() { last_submitted_log_message.text = ""; } +::ymmsl::Reference MockMMPClient::last_instance_id("NONE"); + int MockMMPClient::num_constructed = 0; std::string MockMMPClient::last_location(""); -::ymmsl::Reference MockMMPClient::last_registered_name("_none"); - std::vector MockMMPClient::last_registered_locations({}); std::vector<::ymmsl::Port> MockMMPClient::last_registered_ports({}); diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp index dd285494..49b6d5ec 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp @@ -15,7 +15,8 @@ namespace libmuscle { namespace impl { class MockMMPClient { public: - explicit MockMMPClient(std::string const & location); + explicit MockMMPClient( + ymmsl::Reference const & instance_id, std::string const & location); void close(); @@ -24,24 +25,23 @@ class MockMMPClient { ymmsl::Settings get_settings(); void register_instance( - ::ymmsl::Reference const & name, std::vector const & locations, std::vector<::ymmsl::Port> const & ports); - auto request_peers(::ymmsl::Reference const & name) -> + auto request_peers() -> std::tuple< std::vector<::ymmsl::Conduit>, std::unordered_map<::ymmsl::Reference, std::vector>, std::unordered_map<::ymmsl::Reference, std::vector> >; - void deregister_instance(::ymmsl::Reference const & name); + void deregister_instance(); static void reset(); + static ::ymmsl::Reference last_instance_id; static int num_constructed; static std::string last_location; - static ::ymmsl::Reference last_registered_name; static std::vector last_registered_locations; static std::vector<::ymmsl::Port> last_registered_ports; static LogMessage last_submitted_log_message; diff --git a/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp b/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp index d6537296..883182bf 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp @@ -95,9 +95,9 @@ TEST(libmuscle_instance, create_instance) { ASSERT_EQ(TestInstance::instance_name_(instance), "test_instance[13][42]"); ASSERT_EQ(MockMMPClient::num_constructed, 1); + ASSERT_EQ(MockMMPClient::last_instance_id, "test_instance[13][42]"); ASSERT_EQ(MockMMPClient::last_location, "node042:9000"); ASSERT_EQ(MockCommunicator::num_constructed, 1); - ASSERT_EQ(MockMMPClient::last_registered_name, "test_instance[13][42]"); ASSERT_EQ(MockMMPClient::last_registered_locations.at(0), "tcp:test1,test2"); ASSERT_EQ(MockMMPClient::last_registered_locations.at(1), "tcp:test3"); ASSERT_EQ(MockMMPClient::last_registered_ports.size(), 3); diff --git a/libmuscle/cpp/src/libmuscle/tests/test_logger.cpp b/libmuscle/cpp/src/libmuscle/tests/test_logger.cpp index b35f36c8..6d1c11b4 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_logger.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_logger.cpp @@ -20,6 +20,7 @@ using libmuscle::impl::Logger; using libmuscle::impl::LogLevel; using libmuscle::impl::MockMMPClient; +using ymmsl::Reference; int main(int argc, char *argv[]) { @@ -37,7 +38,7 @@ void reset_mocks() { TEST(libmuscle_logging, test_logger) { reset_mocks(); - MockMMPClient manager(""); + MockMMPClient manager(Reference("test_instance[10]"), ""); Logger logger("test_instance[10]", "", manager); logger.log(LogLevel::CRITICAL, "Testing: ", 10, " == ", 10.0); @@ -51,7 +52,7 @@ TEST(libmuscle_logging, test_logger) { TEST(libmuscle_logging, test_set_level) { reset_mocks(); - MockMMPClient manager(""); + MockMMPClient manager(Reference("test_instance"), ""); Logger logger("test_instance", "", manager); // default is WARNING From 821cde234da736b459d0e0ac892be611ef0cb64d Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Tue, 29 Nov 2022 16:04:24 +0100 Subject: [PATCH 012/188] Add C++ profiling machinery --- libmuscle/cpp/src/libmuscle/communicator.cpp | 2 +- libmuscle/cpp/src/libmuscle/communicator.hpp | 6 +- libmuscle/cpp/src/libmuscle/instance.cpp | 8 +- libmuscle/cpp/src/libmuscle/mmp_client.cpp | 45 +++++++++ libmuscle/cpp/src/libmuscle/mmp_client.hpp | 7 ++ libmuscle/cpp/src/libmuscle/profiler.cpp | 36 +++++++ libmuscle/cpp/src/libmuscle/profiler.hpp | 54 +++++++++++ libmuscle/cpp/src/libmuscle/profiling.cpp | 40 ++++++++ libmuscle/cpp/src/libmuscle/profiling.hpp | 93 +++++++++++++++++++ .../tests/mocks/mock_communicator.cpp | 2 +- .../tests/mocks/mock_communicator.hpp | 3 +- .../libmuscle/tests/mocks/mock_profiler.cpp | 17 ++++ .../libmuscle/tests/mocks/mock_profiler.hpp | 23 +++++ .../src/libmuscle/tests/test_communicator.cpp | 34 ++++--- .../cpp/src/libmuscle/tests/test_instance.cpp | 3 +- 15 files changed, 352 insertions(+), 21 deletions(-) create mode 100644 libmuscle/cpp/src/libmuscle/profiler.cpp create mode 100644 libmuscle/cpp/src/libmuscle/profiler.hpp create mode 100644 libmuscle/cpp/src/libmuscle/profiling.cpp create mode 100644 libmuscle/cpp/src/libmuscle/profiling.hpp create mode 100644 libmuscle/cpp/src/libmuscle/tests/mocks/mock_profiler.cpp create mode 100644 libmuscle/cpp/src/libmuscle/tests/mocks/mock_profiler.hpp diff --git a/libmuscle/cpp/src/libmuscle/communicator.cpp b/libmuscle/cpp/src/libmuscle/communicator.cpp index 644d67ae..42dda997 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/communicator.cpp @@ -30,7 +30,7 @@ Communicator::Communicator( ymmsl::Reference const & kernel, std::vector const & index, Optional const & declared_ports, - Logger & logger, int profiler) + Logger & logger, Profiler & profiler) : kernel_(kernel) , index_(index) , declared_ports_(declared_ports) diff --git a/libmuscle/cpp/src/libmuscle/communicator.hpp b/libmuscle/cpp/src/libmuscle/communicator.hpp index b8fcac91..2ae8e294 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.hpp +++ b/libmuscle/cpp/src/libmuscle/communicator.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -44,12 +45,11 @@ class Communicator { * @param logger The logger for this instance. * @param profiler The profiler to use for recording sends and receives. */ - // TODO: use actual Profiler Communicator( ymmsl::Reference const & kernel, std::vector const & index, Optional const & declared_ports, - Logger & logger, int profiler); + Logger & logger, Profiler & profiler); /** Returns a list of locations that we can be reached at. @@ -193,7 +193,7 @@ class Communicator { Optional declared_ports_; PostOffice post_office_; Logger & logger_; - int profiler_; + Profiler & profiler_; std::vector> servers_; std::unordered_map> clients_; Ports_ ports_; diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index addf9be8..fc5c85a2 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -95,6 +96,7 @@ class Instance::Impl { ::ymmsl::Reference instance_name_; std::unique_ptr manager_; std::unique_ptr logger_; + std::unique_ptr profiler_; std::unique_ptr communicator_; #ifdef MUSCLE_ENABLE_MPI int mpi_root_; @@ -168,8 +170,10 @@ Instance::Impl::Impl( std::string default_logfile = "muscle_" + instance_id + ".log"; std::string log_file = extract_log_file_location(argc, argv, default_logfile); logger_.reset(new Logger(instance_id, log_file, *manager_)); + profiler_.reset(new Profiler(*manager_)); - communicator_.reset(new Communicator(name_(), index_(), ports, *logger_, 0)); + communicator_.reset( + new Communicator(name_(), index_(), ports, *logger_, *profiler_)); register_(); connect_(); set_local_log_level_(); @@ -438,7 +442,7 @@ void Instance::Impl::deregister_() { manager_->deregister_instance(); // TODO: stop profile // This is the last thing we'll profile, so flush messages - // TODO: shut down profiler + profiler_->shutdown(); logger_->info("Deregistered from the manager"); } diff --git a/libmuscle/cpp/src/libmuscle/mmp_client.cpp b/libmuscle/cpp/src/libmuscle/mmp_client.cpp index d3fa7119..3b036626 100644 --- a/libmuscle/cpp/src/libmuscle/mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/mmp_client.cpp @@ -23,6 +23,8 @@ using libmuscle::impl::Data; using libmuscle::impl::DataConstRef; using libmuscle::impl::mcp::unpack_data; +using libmuscle::impl::Optional; +using libmuscle::impl::ProfileEvent; using std::chrono::steady_clock; using ymmsl::Conduit; using ymmsl::Reference; @@ -63,6 +65,34 @@ namespace { Data encode_port(ymmsl::Port const & port) { return Data::list(std::string(port.name), encode_operator(port.oper)); } + + template + Data encode_optional(Optional const & value) { + Data encoded; + if (value.is_set()) + encoded = value.get(); + return encoded; + } + + Data encode_profile_event(ProfileEvent const & event) { + if (!event.start_time.is_set() || !event.stop_time.is_set()) { + throw std::runtime_error( + "Incomplete ProfileEvent sent. This is a bug, please" + " report it."); + } + + Data encoded_port; + if (event.port.is_set()) + encoded_port = encode_port(event.port.get()); + + return Data::list( + static_cast(event.event_type), + event.start_time.get().seconds, + event.stop_time.get().seconds, + encoded_port, encode_optional(event.port_length), + encode_optional(event.slot), encode_optional(event.message_size), + encode_optional(event.message_timestamp)); + } } namespace libmuscle { namespace impl { @@ -88,6 +118,21 @@ void MMPClient::submit_log_message(LogMessage const & message) { call_manager_(request); } +void MMPClient::submit_profile_events( + std::vector const & events) +{ + auto event_list = Data::nils(events.size()); + for (std::size_t i = 0u; i < events.size(); ++i) + event_list[i] = encode_profile_event(events[i]); + + auto request = Data::list( + static_cast(RequestType::submit_profile_events), + static_cast(instance_id_), + event_list); + + auto response = call_manager_(request); +} + void MMPClient::register_instance( std::vector const & locations, std::vector<::ymmsl::Port> const & ports) diff --git a/libmuscle/cpp/src/libmuscle/mmp_client.hpp b/libmuscle/cpp/src/libmuscle/mmp_client.hpp index 37a5c465..84aa776b 100644 --- a/libmuscle/cpp/src/libmuscle/mmp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/mmp_client.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -49,6 +50,12 @@ class MMPClient { */ void submit_log_message(LogMessage const & message); + /** Sends profiling events to the manager. + * + * @param events The events to send. + */ + void submit_profile_events(std::vector const & events); + /** Get the global settings from the manager. * * @return A Settings object with the global settings. diff --git a/libmuscle/cpp/src/libmuscle/profiler.cpp b/libmuscle/cpp/src/libmuscle/profiler.cpp new file mode 100644 index 00000000..a99d3bfd --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/profiler.cpp @@ -0,0 +1,36 @@ +#include + +#include + + +using libmuscle::impl::Timestamp; + + +namespace libmuscle { namespace impl { + +Profiler::Profiler(MMPClient & manager) + : manager_(manager) + , events_() +{} + +void Profiler::shutdown() { + flush_(); +} + +void Profiler::record_event(ProfileEvent && event) { + if (!event.stop_time.is_set()) + event.stop_time = Timestamp(); + events_.push_back(std::move(event)); + if (events_.size() >= 100) + flush_(); +} + +void Profiler::flush_() { + if (!events_.empty()) { + manager_.submit_profile_events(events_); + events_.clear(); + } +} + +} } + diff --git a/libmuscle/cpp/src/libmuscle/profiler.hpp b/libmuscle/cpp/src/libmuscle/profiler.hpp new file mode 100644 index 00000000..8906ef70 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/profiler.hpp @@ -0,0 +1,54 @@ +#pragma once + +#ifdef LIBMUSCLE_MOCK_PROFILER +#include LIBMUSCLE_MOCK_PROFILER +#else + + +#include +#include +#include + + +namespace libmuscle { namespace impl { + +/** Collects profiling events and sends them to the manager. + */ +class Profiler { + public: + /** Create a Profiler. + * + * @param manager The client used to submit data to the manager. + */ + Profiler(MMPClient & manager); + + /** Shut down the profiler. + * + * This flushes any remaining data to the manager. + */ + void shutdown(); + + /** Record a profiling event. + * + * This will record the event, and may flush this and previously + * recorded events to the manager. If the time is still running, + * it will be stopped. Other than this the event must be complete + * when it is submitted. Allocate an event on the stack, then move + * it into this member function. Do not use the event object after + * calling this function with it. + * + * @param event The event to record. + */ + void record_event(ProfileEvent && event); + + private: + MMPClient & manager_; + std::vector events_; + + void flush_(); +}; + +} } + +#endif + diff --git a/libmuscle/cpp/src/libmuscle/profiling.cpp b/libmuscle/cpp/src/libmuscle/profiling.cpp new file mode 100644 index 00000000..11d231bd --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/profiling.cpp @@ -0,0 +1,40 @@ +#include + +#include + + +using ymmsl::Port; + + +namespace libmuscle { namespace impl { + +ProfileEvent::ProfileEvent( + ProfileEventType event_type, + Optional start_time, + Optional stop_time, + Optional const & port, + Optional port_length, + Optional slot, + Optional message_size, + Optional message_timestamp) + : event_type(event_type) + , start_time(start_time) + , stop_time(stop_time) + , port(port) + , port_length(port_length) + , slot(slot) + , message_size(message_size) + , message_timestamp(message_timestamp) +{} + +void ProfileEvent::start() { + start_time = Timestamp(); +} + +void ProfileEvent::stop() { + stop_time = Timestamp(); +} + +} } + + diff --git a/libmuscle/cpp/src/libmuscle/profiling.hpp b/libmuscle/cpp/src/libmuscle/profiling.hpp new file mode 100644 index 00000000..71b65dec --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/profiling.hpp @@ -0,0 +1,93 @@ +#pragma once + +#include + +#include +#include +#include + + +namespace libmuscle { namespace impl { + +/** Profiling event types for MUSCLE3. + * + * These match the definitions on the Python side, and should be kept in sync. + * + * The underscore on register_ is just there because it's a keyword. + */ +enum class ProfileEventType { + register_ = 0, + connect = 4, + deregister = 1, + send = 2, + receive = 3, + receive_wait = 5, + receive_transfer = 6, + receive_decode = 7 +}; + + +/** A profile event as used by MUSCLE3. + * + * This represents a single measurement of the timing of some event that + * occurred while executing the simulation. + */ +class ProfileEvent { + public: + /** Create a ProfileEvent. + * + * @param event_type Type of event that was measured. + * @param start_time When the even started (real-world, not simulation + * time). + * @param stop_time When the event ended (real-world, not simulation + * time). + * @param port Port used for sending or receiving, if applicable. + * @param port_length Length of that port, if a vector. + * @param slot Slot that was sent or received on, if applicable. + * @param message_size Size of the message involved, if applicable. + * @param message_timestamp Timestamp sent with the message, if + * applicable. + */ + ProfileEvent( + ProfileEventType event_type, + Optional start_time = Optional(), + Optional stop_time = Optional(), + Optional const & port = Optional(), + Optional port_length = Optional(), + Optional slot = Optional(), + Optional message_size = Optional(), + Optional message_timestamp = Optional()); + + /** Sets start_time to the current time. */ + void start(); + + /** Sets stop_time to the current time. */ + void stop(); + + /// Type of event that was measured. + ProfileEventType event_type; + + /// When the event started (real-world, not simulation time). + Optional start_time; + + /// When the event ended (real-world, not simulation time). + Optional stop_time; + + /// Port used for sending or receiving, if applicable. + Optional port; + + /// Length of that port, if a vector. + Optional port_length; + + /// Slot that was sent or received on, if applicable. + Optional slot; + + /// Size of the message involved, if applicable. + Optional message_size; + + /// Timestamp sent with the message, if applicable. + Optional message_timestamp; +}; + +} } + diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.cpp index 0f01a3a8..a34d3df1 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.cpp @@ -22,7 +22,7 @@ MockCommunicator::MockCommunicator( ymmsl::Reference const & kernel, std::vector const & index, Optional const & declared_ports, - Logger & logger, int profiler) + Logger & logger, Profiler & profiler) { ++num_constructed; } diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.hpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.hpp index d9db51ec..32d47bee 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.hpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -26,7 +27,7 @@ class MockCommunicator { ymmsl::Reference const & kernel, std::vector const & index, Optional const & declared_ports, - Logger & logger, int profiler); + Logger & logger, Profiler & profiler); std::vector get_locations() const; diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_profiler.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_profiler.cpp new file mode 100644 index 00000000..4810bbf6 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_profiler.cpp @@ -0,0 +1,17 @@ +#include "mocks/mock_profiler.hpp" + +#include + + +namespace libmuscle { namespace impl { + + MockProfiler::MockProfiler() {}; + + MockProfiler::MockProfiler(MMPClient & manager) {}; + + void MockProfiler::shutdown() {}; + + void MockProfiler::record_event(ProfileEvent && event) {}; + +} } + diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_profiler.hpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_profiler.hpp new file mode 100644 index 00000000..9f9a4365 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_profiler.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + + +namespace libmuscle { namespace impl { + +class MockProfiler { + public: + MockProfiler(); + + MockProfiler(MMPClient & manager); + + void shutdown(); + + void record_event(ProfileEvent && event); +}; + +using Profiler = MockProfiler; + +} } + diff --git a/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp b/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp index 4286f452..01291a91 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp @@ -4,6 +4,7 @@ #define LIBMUSCLE_MOCK_MCP_TCP_TRANSPORT_SERVER #define LIBMUSCLE_MOCK_PEER_MANAGER #define LIBMUSCLE_MOCK_POST_OFFICE +#define LIBMUSCLE_MOCK_PROFILER // into the real implementation, #include @@ -25,6 +26,7 @@ #include #include #include +#include #include #include @@ -38,6 +40,8 @@ #include #include #include +#include + using libmuscle::impl::Communicator; using libmuscle::impl::Data; @@ -45,12 +49,13 @@ using libmuscle::impl::Endpoint; using libmuscle::impl::Optional; using libmuscle::impl::PeerDims; using libmuscle::impl::PeerLocations; -using libmuscle::impl::MockLogger; -using libmuscle::impl::MockPeerManager; -using libmuscle::impl::MockPostOffice; using libmuscle::impl::Port; using libmuscle::impl::PortsDescription; using libmuscle::impl::Message; +using libmuscle::impl::MockLogger; +using libmuscle::impl::MockPeerManager; +using libmuscle::impl::MockPostOffice; +using libmuscle::impl::MockProfiler; using libmuscle::impl::MockMPPClient; using libmuscle::impl::mcp::MockTcpTransportServer; @@ -95,9 +100,14 @@ MockLogger & mock_logger() { return logger; } +MockProfiler & mock_profiler() { + static MockProfiler profiler; + return profiler; +} + std::unique_ptr connected_communicator() { std::unique_ptr comm(new Communicator( - Reference("kernel"), {13}, {}, mock_logger(), 0)); + Reference("kernel"), {13}, {}, mock_logger(), mock_profiler())); std::vector conduits({ Conduit("kernel.out", "other.in"), @@ -120,7 +130,7 @@ std::unique_ptr connected_communicator() { std::unique_ptr connected_communicator2() { std::unique_ptr comm(new Communicator( - Reference("other"), {}, {}, mock_logger(), 0)); + Reference("other"), {}, {}, mock_logger(), mock_profiler())); std::vector conduits({ Conduit("kernel.out", "other.in"), @@ -148,7 +158,7 @@ std::unique_ptr connected_communicator3() { }); std::unique_ptr comm(new Communicator( - Reference("kernel"), {}, desc, mock_logger(), 0)); + Reference("kernel"), {}, desc, mock_logger(), mock_profiler())); std::vector conduits({ Conduit("kernel.out", "other.in"), @@ -177,7 +187,7 @@ std::unique_ptr connected_communicator3() { TEST(libmuscle_communicator, create_communicator) { reset_mocks(); Communicator comm( - Reference("kernel"), {13}, {}, mock_logger(), 0); + Reference("kernel"), {13}, {}, mock_logger(), mock_profiler()); ASSERT_EQ(MockTcpTransportServer::num_constructed, 1); ASSERT_EQ(MockMPPClient::num_constructed, 0); } @@ -185,7 +195,7 @@ TEST(libmuscle_communicator, create_communicator) { TEST(libmuscle_communicator, get_locations) { reset_mocks(); Communicator comm( - Reference("kernel"), {13}, {}, mock_logger(), 0); + Reference("kernel"), {13}, {}, mock_logger(), mock_profiler()); ASSERT_EQ(comm.get_locations().size(), 1); ASSERT_EQ(comm.get_locations()[0], "tcp:test_location"); } @@ -193,7 +203,7 @@ TEST(libmuscle_communicator, get_locations) { TEST(libmuscle_communicator, test_connect) { reset_mocks(); Communicator comm( - Reference("kernel"), {13}, {}, mock_logger(), 0); + Reference("kernel"), {13}, {}, mock_logger(), mock_profiler()); std::vector conduits({ Conduit("kernel.out", "other.in"), @@ -229,7 +239,7 @@ TEST(libmuscle_communicator, test_connect_vector_ports) { }); Communicator comm( - Reference("kernel"), {13}, desc, mock_logger(), 0); + Reference("kernel"), {13}, desc, mock_logger(), mock_profiler()); std::vector conduits({ Conduit("other1.out", "kernel.in"), @@ -291,7 +301,7 @@ TEST(libmuscle_communicator, test_connect_multidimensional_ports) { }); Communicator comm( - Reference("kernel"), {13}, desc, mock_logger(), 0); + Reference("kernel"), {13}, desc, mock_logger(), mock_profiler()); std::vector conduits({ Conduit("other.out", "kernel.in") @@ -322,7 +332,7 @@ TEST(libmuscle_communicator, test_connect_inferred_ports) { reset_mocks(); Communicator comm( - Reference("kernel"), {13}, {}, mock_logger(), 0); + Reference("kernel"), {13}, {}, mock_logger(), mock_profiler()); std::vector conduits({ Conduit("other1.out", "kernel.in"), diff --git a/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp b/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp index 883182bf..48247a57 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp @@ -2,6 +2,7 @@ #define LIBMUSCLE_MOCK_COMMUNICATOR #define LIBMUSCLE_MOCK_LOGGER #define LIBMUSCLE_MOCK_MMP_CLIENT +#define LIBMUSCLE_MOCK_PROFILER // into the real implementation, #include @@ -20,7 +21,7 @@ #include #include #include - +#include // Test code dependencies #include From c3e28f71cd6c7ebf9642693da44a4ca11de44c5c Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Tue, 29 Nov 2022 20:10:31 +0100 Subject: [PATCH 013/188] Profile sending in C++ --- libmuscle/cpp/src/libmuscle/communicator.cpp | 12 ++++++++++-- libmuscle/cpp/src/libmuscle/instance.cpp | 16 ++++++++++------ libmuscle/cpp/src/libmuscle/profiling.cpp | 2 +- .../src/libmuscle/tests/test_communicator.cpp | 5 +++-- libmuscle/cpp/src/libmuscle/util.hpp | 5 +++-- libmuscle/cpp/src/libmuscle/util.tpp | 5 +++-- 6 files changed, 30 insertions(+), 15 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/communicator.cpp b/libmuscle/cpp/src/libmuscle/communicator.cpp index 42dda997..c436e7ef 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/communicator.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include @@ -118,7 +120,9 @@ void Communicator::send_message( Port & port = ports_.at(port_name); - // TODO start profile event + ProfileEvent profile_event( + ProfileEventType::send, Timestamp(), {}, port, {}, slot, + {}, message.timestamp()); auto recv_endpoints = peer_manager_->get_peer_endpoints( snd_endpoint.port, slot_list); @@ -140,12 +144,16 @@ void Communicator::send_message( mpp_message.next_timestamp = message.next_timestamp(); auto message_bytes = std::make_unique(mpp_message.encoded()); + profile_event.message_size = message_bytes->size(); post_office_.deposit(recv_endpoint.ref(), std::move(message_bytes)); } port.increment_num_messages(slot); - // TODO: stop and complete profile event + profile_event.stop(); + if (port.is_vector()) + profile_event.port_length = port.get_length(); + profiler_.record_event(std::move(profile_event)); } Message Communicator::receive_message( diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index fc5c85a2..1c330ba1 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include #ifdef MUSCLE_ENABLE_MPI #include @@ -25,6 +27,8 @@ using ymmsl::Reference; using ymmsl::Settings; using libmuscle::impl::LogLevel; +using libmuscle::impl::ProfileEvent; +using libmuscle::impl::ProfileEventType; namespace { @@ -416,31 +420,31 @@ void Instance::Impl::send( /* Register this instance with the manager. */ void Instance::Impl::register_() { - // TODO: profile this + ProfileEvent register_event(ProfileEventType::register_, Timestamp()); auto locations = communicator_->get_locations(); auto port_list = list_declared_ports_(); manager_->register_instance(locations, port_list); - // TODO: stop profile + profiler_->record_event(std::move(register_event)); logger_->info("Registered with the manager"); } /* Connect this instance to the given peers / conduits. */ void Instance::Impl::connect_() { - // TODO: profile this + ProfileEvent connect_event(ProfileEventType::connect, Timestamp()); auto peer_info = manager_->request_peers(); communicator_->connect(std::get<0>(peer_info), std::get<1>(peer_info), std::get<2>(peer_info)); settings_manager_.base = manager_->get_settings(); - // TODO: stop profile + profiler_->record_event(std::move(connect_event)); logger_->info("Received peer locations and base settings"); } /* Deregister this instance from the manager. */ void Instance::Impl::deregister_() { - // TODO: profile this + ProfileEvent deregister_event(ProfileEventType::deregister, Timestamp()); manager_->deregister_instance(); - // TODO: stop profile + profiler_->record_event(std::move(deregister_event)); // This is the last thing we'll profile, so flush messages profiler_->shutdown(); logger_->info("Deregistered from the manager"); diff --git a/libmuscle/cpp/src/libmuscle/profiling.cpp b/libmuscle/cpp/src/libmuscle/profiling.cpp index 11d231bd..9583cd97 100644 --- a/libmuscle/cpp/src/libmuscle/profiling.cpp +++ b/libmuscle/cpp/src/libmuscle/profiling.cpp @@ -12,7 +12,7 @@ ProfileEvent::ProfileEvent( ProfileEventType event_type, Optional start_time, Optional stop_time, - Optional const & port, + Optional const & port, Optional port_length, Optional slot, Optional message_size, diff --git a/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp b/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp index 01291a91..aa4db788 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp @@ -21,6 +21,8 @@ #include #include #include +#include +#include // then add mock implementations as needed. #include @@ -49,7 +51,6 @@ using libmuscle::impl::Endpoint; using libmuscle::impl::Optional; using libmuscle::impl::PeerDims; using libmuscle::impl::PeerLocations; -using libmuscle::impl::Port; using libmuscle::impl::PortsDescription; using libmuscle::impl::Message; using libmuscle::impl::MockLogger; @@ -73,7 +74,7 @@ int main(int argc, char *argv[]) { namespace libmuscle { namespace impl { struct TestCommunicator { - static std::unordered_map const & ports_( + static std::unordered_map const & ports_( Communicator const & comm) { return comm.ports_; diff --git a/libmuscle/cpp/src/libmuscle/util.hpp b/libmuscle/cpp/src/libmuscle/util.hpp index b579480b..1dffe8ba 100644 --- a/libmuscle/cpp/src/libmuscle/util.hpp +++ b/libmuscle/cpp/src/libmuscle/util.hpp @@ -58,9 +58,10 @@ class Optional { * is_set() will return true for this object, and get() will return it. * Also implicitly converts, of course. * - * @param t An object to copy + * @param u An object to copy. */ - Optional(T const & t); + template + Optional(U const & u); /** Copy an Optional. * diff --git a/libmuscle/cpp/src/libmuscle/util.tpp b/libmuscle/cpp/src/libmuscle/util.tpp index 83220cc6..1d0adbd2 100644 --- a/libmuscle/cpp/src/libmuscle/util.tpp +++ b/libmuscle/cpp/src/libmuscle/util.tpp @@ -21,10 +21,11 @@ Optional::Optional(std::initializer_list l) } template -Optional::Optional(T const & t) +template +Optional::Optional(U const & u) : is_set_(true) { - new (&t_) T(t); + new (&t_) T(u); } template From fd0b878473011f1ee62ad0682c06fea03863d2c3 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Thu, 1 Dec 2022 08:47:15 +0100 Subject: [PATCH 014/188] Profile receiving in C++ --- libmuscle/cpp/src/libmuscle/communicator.cpp | 31 ++++++++++++++++--- .../libmuscle/mcp/tcp_transport_client.cpp | 13 +++++--- .../libmuscle/mcp/tcp_transport_client.hpp | 3 +- .../src/libmuscle/mcp/transport_client.hpp | 18 ++++++++--- libmuscle/cpp/src/libmuscle/mmp_client.cpp | 3 +- libmuscle/cpp/src/libmuscle/mpp_client.cpp | 5 ++- libmuscle/cpp/src/libmuscle/mpp_client.hpp | 6 ++-- .../libmuscle/tests/mocks/mock_mpp_client.cpp | 9 ++++-- .../libmuscle/tests/mocks/mock_mpp_client.hpp | 10 +++++- .../src/libmuscle/tests/mpp_client_test.cpp | 3 +- .../tests/test_tcp_communication.cpp | 2 +- .../libmuscle/tests/test_tcp_transport.cpp | 7 +++-- 12 files changed, 86 insertions(+), 24 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/communicator.cpp b/libmuscle/cpp/src/libmuscle/communicator.cpp index c436e7ef..dcca535e 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/communicator.cpp @@ -188,18 +188,25 @@ Message Communicator::receive_message( Port & port = (ports_.count(port_name)) ? (ports_.at(port_name)) : muscle_settings_in_.get(); - // TODO start profile event + ProfileEvent receive_event( + ProfileEventType::receive, Timestamp(), {}, port, {}, slot); // peer_manager already checks that there is at most one snd_endpoint // connected to the port we receive on Endpoint snd_endpoint = peer_manager_->get_peer_endpoints( recv_endpoint.port, slot_list).at(0); MPPClient & client = get_client_(snd_endpoint.instance()); - auto mpp_message = MPPMessage::from_bytes( - client.receive(recv_endpoint.ref())); + auto msg_and_profile = client.receive(recv_endpoint.ref()); + ProfileEvent recv_decode_event( + ProfileEventType::receive_decode, Timestamp(), {}, port, {}, slot, + std::get<0>(msg_and_profile).size()); + + auto mpp_message = MPPMessage::from_bytes(std::get<0>(msg_and_profile)); Settings overlay_settings(mpp_message.settings_overlay.as()); + profiler_.record_event(std::move(recv_decode_event)); + if (mpp_message.port_length.is_set()) if (port.is_resizable()) port.set_length(mpp_message.port_length.get()); @@ -217,7 +224,23 @@ Message Communicator::receive_message( port.set_closed(); } - // TODO stop and finalise profile event + auto profile = std::get<1>(msg_and_profile); + ProfileEvent recv_wait_event( + ProfileEventType::receive_wait, std::get<0>(profile), + std::get<1>(profile), port, mpp_message.port_length, slot); + profiler_.record_event(std::move(recv_wait_event)); + + ProfileEvent recv_xfer_event( + ProfileEventType::receive_transfer, std::get<1>(profile), + std::get<2>(profile), port, mpp_message.port_length, slot, + std::get<0>(msg_and_profile).size(), message.timestamp()); + profiler_.record_event(std::move(recv_xfer_event)); + + receive_event.message_timestamp = message.timestamp(); + if (port.is_vector()) + receive_event.port_length = port.get_length(); + receive_event.message_size = std::get<0>(msg_and_profile).size(); + profiler_.record_event(std::move(receive_event)); int expected_message_number = port.get_num_messages(slot); // TODO: handle f_init port counts for STATELESS and WEAKLY_STATEFUL diff --git a/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.cpp b/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.cpp index 959737d2..39fe2600 100644 --- a/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.cpp +++ b/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.cpp @@ -1,7 +1,8 @@ #include "libmuscle/mcp/tcp_transport_client.hpp" -#include "libmuscle/data.hpp" -#include "libmuscle/mcp/tcp_util.hpp" +#include +#include +#include #include #include @@ -121,15 +122,19 @@ TcpTransportClient::~TcpTransportClient() { close(); } -DataConstRef TcpTransportClient::call( +std::tuple TcpTransportClient::call( char const * req_buf, std::size_t req_len ) const { + Timestamp start_wait; send_frame(socket_fd_, req_buf, req_len); int64_t length = recv_int64(socket_fd_); + Timestamp start_transfer; auto result = Data::byte_array(length); recv_all(socket_fd_, result.as_byte_array(), result.size()); - return result; + Timestamp stop_transfer; + return std::make_tuple( + result, std::make_tuple(start_wait, start_transfer, stop_transfer)); } void TcpTransportClient::close() { diff --git a/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.hpp b/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.hpp index 75351da4..e22f689f 100644 --- a/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.hpp +++ b/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.hpp @@ -7,6 +7,7 @@ #include "libmuscle/mcp/transport_client.hpp" #include "libmuscle/data.hpp" +#include namespace libmuscle { namespace impl { namespace mcp { @@ -45,7 +46,7 @@ class TcpTransportClient : public TransportClient { * @return A DataConstRef containing a byte array with the received * data. */ - virtual DataConstRef call( + virtual std::tuple call( char const * req_buf, std::size_t req_len) const override; /** Closes this client. diff --git a/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp b/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp index c4574795..45cdeb72 100644 --- a/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp +++ b/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp @@ -1,13 +1,20 @@ #pragma once -#include "libmuscle/data.hpp" +#include + +#include #include +#include #include namespace libmuscle { namespace impl { namespace mcp { + +using ProfileData = std::tuple; + + /** A client that connects to an MCP transport server. * * This is a base class for MCP Transport Clients. An MCP Transport Client @@ -50,15 +57,18 @@ class TransportClient { /** Send a request to the server and receive the response. * - * This is a blocking call. + * This is a blocking call. Besides the result, this function + * returns a tuple with three timestamps. These were taken when + * the function was first called, when data became available and + * the transfer started, and when the transfer stopped. * * @param req_buf Pointer to the request to send * @param req_len Length of the request in bytes * * @return DataConstRef containing a byte array with the received - * data. + * data, and the timestamps. */ - virtual DataConstRef call( + virtual std::tuple call( char const * req_buf, std::size_t req_len) const = 0; /** Closes this client. diff --git a/libmuscle/cpp/src/libmuscle/mmp_client.cpp b/libmuscle/cpp/src/libmuscle/mmp_client.cpp index 3b036626..262b8dd5 100644 --- a/libmuscle/cpp/src/libmuscle/mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/mmp_client.cpp @@ -262,7 +262,8 @@ DataConstRef MMPClient::call_manager_(DataConstRef const & request) { msgpack::sbuffer sbuf; msgpack::pack(sbuf, request); - auto result = transport_client_.call(sbuf.data(), sbuf.size()); + auto res = transport_client_.call(sbuf.data(), sbuf.size()); + auto const & result = std::get<0>(res); auto zone = std::make_shared(); return unpack_data(zone, result.as_byte_array(), result.size()); diff --git a/libmuscle/cpp/src/libmuscle/mpp_client.cpp b/libmuscle/cpp/src/libmuscle/mpp_client.cpp index ea54686d..274cb421 100644 --- a/libmuscle/cpp/src/libmuscle/mpp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/mpp_client.cpp @@ -11,6 +11,7 @@ using libmuscle::impl::Data; using libmuscle::impl::DataConstRef; +using libmuscle::impl::mcp::ProfileData; using libmuscle::impl::mcp::TcpTransportClient; using ymmsl::Reference; @@ -23,7 +24,9 @@ MPPClient::MPPClient(std::vector const & locations) { throw std::runtime_error("Could not connect to peer"); } -DataConstRef MPPClient::receive(Reference const & receiver) { +std::tuple MPPClient::receive( + Reference const & receiver) +{ auto request = Data::list( static_cast(RequestType::get_next_message), std::string(receiver)); diff --git a/libmuscle/cpp/src/libmuscle/mpp_client.hpp b/libmuscle/cpp/src/libmuscle/mpp_client.hpp index 3b2a9350..8e9be99e 100644 --- a/libmuscle/cpp/src/libmuscle/mpp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/mpp_client.hpp @@ -11,6 +11,7 @@ #include #include +#include #include @@ -52,13 +53,14 @@ class MPPClient { /** Receive a message from a port this client connects to. * * This returns a DataConstRef holding a byte array with the received - * data. + * data, and profiling data. * * @param The receiving (local) port. * * @return The received message. */ - DataConstRef receive(::ymmsl::Reference const & receiver); + std::tuple receive( + ::ymmsl::Reference const & receiver); /** Closes this client. * diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp index 31507a44..c4c99850 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp @@ -2,9 +2,11 @@ #include #include +#include #include #include +#include #include @@ -16,10 +18,13 @@ MockMPPClient::MockMPPClient(std::vector const & locations) { MockMPPClient::~MockMPPClient() {} -DataConstRef MockMPPClient::receive(::ymmsl::Reference const & receiver) { +std::tuple MockMPPClient::receive( + ::ymmsl::Reference const & receiver) { last_receiver = receiver; - return next_receive_message.encoded(); + return std::make_tuple( + next_receive_message.encoded(), std::make_tuple( + Timestamp(1.0), Timestamp(2.0), Timestamp(3.0))); } void MockMPPClient::close() {} diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp index 64e3f029..793e276e 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp @@ -1,15 +1,22 @@ #pragma once #include +#include +#include #include #include +#include #include namespace libmuscle { namespace impl { + +using ProfileData = std::tuple; + + class MockMPPClient { public: MockMPPClient(std::vector const & locations); @@ -19,7 +26,8 @@ class MockMPPClient { MockMPPClient & operator=(MockMPPClient && rhs) = delete; ~MockMPPClient(); - DataConstRef receive(::ymmsl::Reference const & receiver); + std::tuple receive( + ::ymmsl::Reference const & receiver); void close(); diff --git a/libmuscle/cpp/src/libmuscle/tests/mpp_client_test.cpp b/libmuscle/cpp/src/libmuscle/tests/mpp_client_test.cpp index bfa77644..4b2d1319 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mpp_client_test.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mpp_client_test.cpp @@ -9,6 +9,7 @@ #include #include +#include #include @@ -32,7 +33,7 @@ int main(int argc, char *argv[]) { // receive a message Reference receiver("test_receiver.test_port2"); - DataConstRef bytes = client.receive(receiver); + DataConstRef bytes = std::get<0>(client.receive(receiver)); MPPMessage message = MPPMessage::from_bytes(bytes); // check message diff --git a/libmuscle/cpp/src/libmuscle/tests/test_tcp_communication.cpp b/libmuscle/cpp/src/libmuscle/tests/test_tcp_communication.cpp index 2d152161..6d70fbc2 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_tcp_communication.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_tcp_communication.cpp @@ -44,7 +44,7 @@ TEST(test_tcp_communication, send_receive) { TcpTransportServer server(post_office); std::vector locations = {server.get_location()}; MPPClient client(locations); - DataConstRef bytes = client.receive(receiver); + DataConstRef bytes = std::get<0>(client.receive(receiver)); MPPMessage m = MPPMessage::from_bytes(bytes); ASSERT_EQ(m.sender, "test_sender.port"); diff --git a/libmuscle/cpp/src/libmuscle/tests/test_tcp_transport.cpp b/libmuscle/cpp/src/libmuscle/tests/test_tcp_transport.cpp index ff2f76f2..4f084a24 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_tcp_transport.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_tcp_transport.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -99,7 +100,8 @@ TEST(test_tcp_communication, send_receive_direct) { ASSERT_TRUE(TcpTransportClient::can_connect_to(location)); TcpTransportClient client(location); - auto result = client.call("TestRequest", strlen("TestRequest")); + auto res = client.call("TestRequest", strlen("TestRequest")); + auto result = std::get<0>(res); std::string response(result.size(), ' '); std::copy(result.as_byte_array(), result.as_byte_array() + result.size(), response.begin()); @@ -120,7 +122,8 @@ TEST(test_tcp_communication, send_receive_delayed) { handler.send_response(); - auto result = client.call("TestRequest", strlen("TestRequest")); + auto res = client.call("TestRequest", strlen("TestRequest")); + auto result = std::get<0>(res); std::string response(result.size(), ' '); std::copy(result.as_byte_array(), result.as_byte_array() + result.size(), response.begin()); From d1ac5b10a538d4da735dfa8a4bdfee753bd28183 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Fri, 2 Dec 2022 16:41:25 +0100 Subject: [PATCH 015/188] Add profiler unit tests --- libmuscle/cpp/src/libmuscle/profiler.hpp | 2 + .../libmuscle/tests/mocks/mock_mmp_client.cpp | 6 + .../libmuscle/tests/mocks/mock_mmp_client.hpp | 4 + .../cpp/src/libmuscle/tests/test_profiler.cpp | 150 ++++++++++++++++++ libmuscle/python/libmuscle/test/conftest.py | 15 ++ .../python/libmuscle/test/test_profiler.py | 47 ++++++ 6 files changed, 224 insertions(+) create mode 100644 libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp create mode 100644 libmuscle/python/libmuscle/test/test_profiler.py diff --git a/libmuscle/cpp/src/libmuscle/profiler.hpp b/libmuscle/cpp/src/libmuscle/profiler.hpp index 8906ef70..1b155fa9 100644 --- a/libmuscle/cpp/src/libmuscle/profiler.hpp +++ b/libmuscle/cpp/src/libmuscle/profiler.hpp @@ -42,6 +42,8 @@ class Profiler { void record_event(ProfileEvent && event); private: + friend class TestProfiler; + MMPClient & manager_; std::vector events_; diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp index 895bbff5..e0edfd8f 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp @@ -23,6 +23,10 @@ void MockMMPClient::submit_log_message(LogMessage const & message) { last_submitted_log_message = message; } +void MockMMPClient::submit_profile_events(std::vector const & event) { + last_submitted_profile_events = event; +} + void MockMMPClient::register_instance( std::vector const & locations, std::vector<::ymmsl::Port> const & ports) @@ -84,5 +88,7 @@ std::vector<::ymmsl::Port> MockMMPClient::last_registered_ports({}); LogMessage MockMMPClient::last_submitted_log_message( "", Timestamp(-1.0), LogLevel::DEBUG, ""); +std::vector MockMMPClient::last_submitted_profile_events; + } } diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp index 49b6d5ec..4930d8af 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -22,6 +23,8 @@ class MockMMPClient { void submit_log_message(LogMessage const & message); + void submit_profile_events(std::vector const & event); + ymmsl::Settings get_settings(); void register_instance( @@ -45,6 +48,7 @@ class MockMMPClient { static std::vector last_registered_locations; static std::vector<::ymmsl::Port> last_registered_ports; static LogMessage last_submitted_log_message; + static std::vector last_submitted_profile_events; }; using MMPClient = MockMMPClient; diff --git a/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp b/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp new file mode 100644 index 00000000..365c3f56 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp @@ -0,0 +1,150 @@ +// Inject mocks +#define LIBMUSCLE_MOCK_MMP_CLIENT + +// into the real implementation, +#include +#include +#include + +// then add mock implementations as needed. +#include + + +// Test code dependencies +#include +#include + +#include +#include + + +using libmuscle::impl::Profiler; +using libmuscle::impl::ProfileEvent; +using libmuscle::impl::ProfileEventType; +using libmuscle::impl::MockMMPClient; +using ymmsl::Port; + + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + + +namespace libmuscle { namespace impl { + +// Helper for accessing internal state + +struct TestProfiler { + static std::vector & events_(Profiler & profiler) { + return profiler.events_; + } +}; + + +// Helpers for comparison, not needed in the main code so put here. +bool operator==(Timestamp const & lhs, Timestamp const & rhs) { + return lhs.seconds == rhs.seconds; +} + +bool operator==(Port const & lhs, Port const & rhs) { + return lhs.name == rhs.name && lhs.oper == rhs.oper; +} + +bool operator==(ProfileEvent const & lhs, ProfileEvent const & rhs) { + if (lhs.event_type != rhs.event_type) + return false; + + if (lhs.start_time != rhs.start_time) + return false; + + if (lhs.stop_time != rhs.stop_time) + return false; + + if (lhs.port != rhs.port) + return false; + + if (lhs.port_length != rhs.port_length) + return false; + + if (lhs.slot != rhs.slot) + return false; + + if (lhs.message_size != rhs.message_size) + return false; + + if (lhs.message_timestamp != rhs.message_timestamp) + return false; + + return true; +} + +} } + +using libmuscle::impl::TestProfiler; + + +/* Mocks have internal state, which needs to be reset before each test. This + * means that the tests are not reentrant, and cannot be run in parallel. + * It's all fast enough, so that's not a problem. + */ +void reset_mocks() { + MockMMPClient::reset(); +} + + +TEST(libmuscle_profiler, test_recording_events) { + reset_mocks(); + MockMMPClient mock_mmp_client(Reference("test_instance[10]"), ""); + Profiler profiler(mock_mmp_client); + + Timestamp t1, t2; + ProfileEvent e(ProfileEventType::register_, t1, t2); + + profiler.record_event(ProfileEvent(e)); + + ASSERT_EQ(e.start_time, t1); + ASSERT_EQ(e.stop_time, t2); + ASSERT_EQ(TestProfiler::events_(profiler).at(0), e); +} + + +TEST(libmuscle_profiler, test_auto_stop_time) { + reset_mocks(); + MockMMPClient mock_mmp_client(Reference("test_instance[10]"), ""); + Profiler profiler(mock_mmp_client); + + Timestamp t1; + ProfileEvent e(ProfileEventType::send, t1); + + profiler.record_event(std::move(e)); + + auto const & e2 = TestProfiler::events_(profiler).at(0); + ASSERT_EQ(e2.start_time, t1); + ASSERT_TRUE(e2.stop_time.is_set()); + ASSERT_TRUE(e2.start_time.get().seconds < e2.stop_time.get().seconds); +} + +TEST(libmuscle_profiler, test_send_to_mock_mmp_client) { + reset_mocks(); + MockMMPClient mock_mmp_client(Reference("test_instance[10]"), ""); + Profiler profiler(mock_mmp_client); + + ProfileEvent e1(ProfileEventType::receive, Timestamp(), Timestamp()); + profiler.record_event(ProfileEvent(e1)); + + for (int i = 1; i < 99; ++i) { + ProfileEvent e(ProfileEventType::send, Timestamp(), Timestamp()); + profiler.record_event(std::move(e)); + } + + ASSERT_EQ(mock_mmp_client.last_submitted_profile_events.size(), 0u); + + ProfileEvent e2(ProfileEventType::receive_transfer, Timestamp(), Timestamp()); + profiler.record_event(ProfileEvent(e2)); + + ASSERT_EQ(mock_mmp_client.last_submitted_profile_events.size(), 100u); + ASSERT_TRUE(mock_mmp_client.last_submitted_profile_events.at(0) == e1); + ASSERT_TRUE(mock_mmp_client.last_submitted_profile_events.at(99) == e2); +} + diff --git a/libmuscle/python/libmuscle/test/conftest.py b/libmuscle/python/libmuscle/test/conftest.py index f84ea289..6b86bc52 100644 --- a/libmuscle/python/libmuscle/test/conftest.py +++ b/libmuscle/python/libmuscle/test/conftest.py @@ -1,3 +1,4 @@ +from copy import copy import pytest from unittest.mock import patch @@ -7,6 +8,7 @@ from libmuscle.communicator import Message from libmuscle.mcp.transport_client import ProfileData from libmuscle.mmp_client import MMPClient +from libmuscle.profiler import Profiler from libmuscle.timestamp import Timestamp @@ -34,3 +36,16 @@ def guard() -> APIGuard: @pytest.fixture def profile_data() -> ProfileData: return Timestamp(0.0), Timestamp(0.0), Timestamp(0.0) + + +@pytest.fixture +def mocked_profiler(): + class MockMMPClient: + def __init__(self): + self.sent_events = None + + def submit_profile_events(self, events): + self.sent_events = copy(events) + + mock_mmp_client = MockMMPClient() + yield Profiler(mock_mmp_client), mock_mmp_client diff --git a/libmuscle/python/libmuscle/test/test_profiler.py b/libmuscle/python/libmuscle/test/test_profiler.py new file mode 100644 index 00000000..d74be45c --- /dev/null +++ b/libmuscle/python/libmuscle/test/test_profiler.py @@ -0,0 +1,47 @@ +from libmuscle.profiling import ProfileEvent, ProfileEventType +from libmuscle.timestamp import Timestamp + + +def test_recording_events(mocked_profiler) -> None: + profiler, _ = mocked_profiler + + t1 = Timestamp() + t2 = Timestamp() + e = ProfileEvent(ProfileEventType.REGISTER, t1, t2) + + profiler.record_event(e) + + assert e.start_time == t1 + assert e.stop_time == t2 + assert e in profiler._events + + +def test_auto_stop_time(mocked_profiler) -> None: + profiler, _ = mocked_profiler + + t1 = Timestamp() + e = ProfileEvent(ProfileEventType.SEND, t1) + + profiler.record_event(e) + + assert e.start_time == t1 + assert e.stop_time is not None + assert e.start_time.seconds < e.stop_time.seconds + + +def test_send_to_manager(mocked_profiler) -> None: + profiler, mock_mmp_client = mocked_profiler + + for i in range(99): + e1 = ProfileEvent(ProfileEventType.RECEIVE, Timestamp()) + profiler.record_event(e1) + + assert mock_mmp_client.sent_events is None + + e2 = ProfileEvent(ProfileEventType.RECEIVE, Timestamp()) + profiler.record_event(e2) + + assert mock_mmp_client.sent_events is not None + assert len(mock_mmp_client.sent_events) == 100 + assert e1 in mock_mmp_client.sent_events + assert e2 in mock_mmp_client.sent_events From c79376a9e012934de4c03c6150d65b0f56777d39 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Wed, 7 Dec 2022 07:28:07 +0100 Subject: [PATCH 016/188] Collect more complete records and skip ClosePort messages --- libmuscle/cpp/src/libmuscle/communicator.cpp | 27 +++++++++++++++----- libmuscle/python/libmuscle/communicator.py | 23 ++++++++++++----- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/communicator.cpp b/libmuscle/cpp/src/libmuscle/communicator.cpp index dcca535e..94bc6f04 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/communicator.cpp @@ -153,7 +153,8 @@ void Communicator::send_message( profile_event.stop(); if (port.is_vector()) profile_event.port_length = port.get_length(); - profiler_.record_event(std::move(profile_event)); + if (!is_close_port(message.data())) + profiler_.record_event(std::move(profile_event)); } Message Communicator::receive_message( @@ -205,7 +206,7 @@ Message Communicator::receive_message( auto mpp_message = MPPMessage::from_bytes(std::get<0>(msg_and_profile)); Settings overlay_settings(mpp_message.settings_overlay.as()); - profiler_.record_event(std::move(recv_decode_event)); + recv_decode_event.stop(); if (mpp_message.port_length.is_set()) if (port.is_resizable()) @@ -227,20 +228,32 @@ Message Communicator::receive_message( auto profile = std::get<1>(msg_and_profile); ProfileEvent recv_wait_event( ProfileEventType::receive_wait, std::get<0>(profile), - std::get<1>(profile), port, mpp_message.port_length, slot); - profiler_.record_event(std::move(recv_wait_event)); + std::get<1>(profile), port, mpp_message.port_length, slot, + std::get<0>(msg_and_profile).size(), message.timestamp()); ProfileEvent recv_xfer_event( ProfileEventType::receive_transfer, std::get<1>(profile), std::get<2>(profile), port, mpp_message.port_length, slot, std::get<0>(msg_and_profile).size(), message.timestamp()); - profiler_.record_event(std::move(recv_xfer_event)); + recv_decode_event.message_timestamp = message.timestamp(); receive_event.message_timestamp = message.timestamp(); - if (port.is_vector()) + + if (port.is_vector()) { receive_event.port_length = port.get_length(); + recv_wait_event.port_length = port.get_length(); + recv_xfer_event.port_length = port.get_length(); + recv_decode_event.port_length = port.get_length(); + } + receive_event.message_size = std::get<0>(msg_and_profile).size(); - profiler_.record_event(std::move(receive_event)); + + if (!is_close_port(message.data())) { + profiler_.record_event(std::move(recv_wait_event)); + profiler_.record_event(std::move(recv_xfer_event)); + profiler_.record_event(std::move(recv_decode_event)); + profiler_.record_event(std::move(receive_event)); + } int expected_message_number = port.get_num_messages(slot); // TODO: handle f_init port counts for STATELESS and WEAKLY_STATEFUL diff --git a/libmuscle/python/libmuscle/communicator.py b/libmuscle/python/libmuscle/communicator.py index 9fb6bc64..e800d03e 100644 --- a/libmuscle/python/libmuscle/communicator.py +++ b/libmuscle/python/libmuscle/communicator.py @@ -243,7 +243,8 @@ def send_message( if port.is_vector(): profile_event.port_length = port.get_length() profile_event.message_size = len(encoded_message) - self._profiler.record_event(profile_event) + if not isinstance(message.data, ClosePort): + self._profiler.record_event(profile_event) def receive_message(self, port_name: str, slot: Optional[int] = None, default: Optional[Message] = None @@ -318,7 +319,7 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, ProfileEventType.RECEIVE_DECODE, Timestamp(), None, port, None, slot, len(mpp_message_bytes)) mpp_message = MPPMessage.from_bytes(mpp_message_bytes) - self._profiler.record_event(recv_decode_event) + recv_decode_event.stop() if mpp_message.port_length is not None: if port.is_resizable(): @@ -333,20 +334,30 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, recv_wait_event = ProfileEvent( ProfileEventType.RECEIVE_WAIT, profile[0], profile[1], port, - mpp_message.port_length, slot) - self._profiler.record_event(recv_wait_event) + mpp_message.port_length, slot, len(mpp_message_bytes), + message.timestamp) recv_xfer_event = ProfileEvent( ProfileEventType.RECEIVE_TRANSFER, profile[1], profile[2], port, mpp_message.port_length, slot, len(mpp_message_bytes), message.timestamp) - self._profiler.record_event(recv_xfer_event) + recv_decode_event.message_timestamp = message.timestamp receive_event.message_timestamp = message.timestamp + if port.is_vector(): receive_event.port_length = port.get_length() + recv_wait_event.port_length = port.get_length() + recv_xfer_event.port_length = port.get_length() + recv_decode_event.port_length = port.get_length() + receive_event.message_size = len(mpp_message_bytes) - self._profiler.record_event(receive_event) + + if not isinstance(mpp_message.data, ClosePort): + self._profiler.record_event(recv_wait_event) + self._profiler.record_event(recv_xfer_event) + self._profiler.record_event(recv_decode_event) + self._profiler.record_event(receive_event) expected_message_number = port.get_num_messages(slot) if expected_message_number != mpp_message.message_number: From 157d6e3bb57a393a91f3880914795fd6a26f3b0c Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Wed, 7 Dec 2022 07:46:45 +0100 Subject: [PATCH 017/188] Add profile database --- .../libmuscle/manager/profile_database.py | 171 ++++++++++++++++++ .../manager/test/test_profile_database.py | 109 +++++++++++ 2 files changed, 280 insertions(+) create mode 100644 libmuscle/python/libmuscle/manager/profile_database.py create mode 100644 libmuscle/python/libmuscle/manager/test/test_profile_database.py diff --git a/libmuscle/python/libmuscle/manager/profile_database.py b/libmuscle/python/libmuscle/manager/profile_database.py new file mode 100644 index 00000000..9f79c57e --- /dev/null +++ b/libmuscle/python/libmuscle/manager/profile_database.py @@ -0,0 +1,171 @@ +from pathlib import Path +import sqlite3 +import threading +from typing import Iterable, Optional, Tuple + +from libmuscle.profiling import ProfileEvent, ProfileEventType +from ymmsl import Operator, Reference + + +class ProfileDatabase: + """Creates and updates a profiling database. + + This class creates the database, and then allow writing to it. It + should be changed in concert with ProfileDatabase to ensure the + data format remains in sync. + """ + def __init__(self, db_file: Path) -> None: + """Create a new profile database. + + Args: + db_file: The file to create and initialise. + """ + if db_file.exists(): + # TODO: maybe allow multiple objects to open the same file? + raise RuntimeError(f'File {db_file} exists, not overwriting it.') + + # Exactly how the sqlite3 Python module's automatic + # transactions work is a bit mysterious, and the documentation + # of sqlite itself isn't perfect either. So we set + # isolation_level to None, which doesn't actually affect the + # effective isolation level (!) but does keep the Python module + # from starting transactions automatically, and then we use + # explicit BEGIN TRANSACTION statements to keep the sqlite + # library from doing anything automatically. That way, it's + # clear what's going on from the code. + # + # Also, connections are expensive and cannot be used in other + # threads. We have a multithreaded TCP server in the manager, + # so we use thread-local storage to get a connection in each + # thread. + self._db_file = db_file + + self._local = threading.local() + self._local.conn = sqlite3.connect(db_file, isolation_level=None) + + cur = self._local.conn.cursor() + cur.execute("BEGIN IMMEDIATE TRANSACTION") + cur.execute( + "CREATE TABLE muscle3_format (" + " major_version INTEGER NOT NULL," + " minor_version INTEGER NOT NULL)") + cur.execute( + "INSERT INTO muscle3_format(major_version, minor_version)" + " VALUES (1, 0)") + + cur.execute( + "CREATE TABLE event_types (" + " oid INTEGER PRIMARY KEY," + " name TEXT UNIQUE)") + event_types = [(t.value, t.name) for t in ProfileEventType] + cur.executemany( + "INSERT INTO event_types (oid, name) VALUES (?, ?)", + event_types) + + cur.execute( + "CREATE TABLE port_operators (" + " oid INTEGER PRIMARY KEY," + " name TEXT UNIQUE)") + port_operators = [(o.value, o.name) for o in Operator] + cur.executemany( + "INSERT INTO port_operators (oid, name) VALUES (?, ?)", + port_operators) + + cur.execute( + "CREATE TABLE instances (" + " oid INTEGER PRIMARY KEY," + " name TEXT UNIQUE)") + + cur.execute( + "CREATE TABLE events (" + " instance INTEGER NOT NULL REFERENCES instances(oid)," + " event_type INTEGER NOT NULL REFERENCES event_types(oid)," + " start_time DOUBLE NOT NULL," + " stop_time DOUBLE NOT NULL," + " port_name TEXT," + " port_operator INTEGER REFERENCES port_operators(oid)," + " port_length INTEGER," + " slot INTEGER," + " message_size INTEGER," + " message_timestamp DOUBLE)") + + cur.execute( + "CREATE VIEW all_events (" + " instance, type, start_time, stop_time, port, operator," + " port_length, slot, message_size, message_timestamp)" + " AS SELECT" + " i.name, et.name, e.start_time, e.stop_time, e.port_name," + " o.name, e.port_length, e.slot, e.message_size," + " e.message_timestamp" + " FROM" + " events e" + " JOIN instances i ON e.instance = i.oid" + " LEFT JOIN event_types et ON e.event_type = et.oid" + " LEFT JOIN port_operators o ON e.port_operator = o.oid") + + cur.execute("COMMIT") + cur.close() + + def add_events( + self, instance_id: Reference, events: Iterable[ProfileEvent] + ) -> None: + """Adds profiling events to the database. + + Args: + events: The events to add. + """ + if not hasattr(self._local, 'conn'): + self._local.conn = sqlite3.connect( + self._db_file, isolation_level=None) + cur = self._local.conn.cursor() + cur.execute("BEGIN IMMEDIATE TRANSACTION") + cur.execute( + "SELECT oid FROM instances WHERE name = ?", + (str(instance_id),)) + oids = cur.fetchall() + if oids: + instance_oid = oids[0][0] + else: + cur.execute( + "INSERT INTO instances (name) VALUES (?) RETURNING oid", + (str(instance_id),)) + instance_oid = cur.fetchone()[0] + + Record = Tuple[ + int, int, float, float, Optional[str], Optional[int], + Optional[int], Optional[int], Optional[int], + Optional[float]] + + def to_tuple(e: ProfileEvent) -> Record: + # Tell mypy this shouldn't happen + assert e.start_time is not None + assert e.stop_time is not None + + port_name = None if e.port is None else str(e.port.name) + port_operator = None if e.port is None else e.port.operator.value + + return ( + instance_oid, e.event_type.value, e.start_time.seconds, + e.stop_time.seconds, port_name, port_operator, + e.port_length, e.slot, e.message_size, e.message_timestamp) + + cur.executemany( + "INSERT INTO events" + " (instance, event_type, start_time, stop_time, port_name," + " port_operator, port_length, slot, message_size," + " message_timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + list(map(to_tuple, events))) + cur.execute("COMMIT") + cur.close() + + def close(self) -> None: + """Close the connection to the database. + + This should be called once by each thread that used this + object before it goes down, so that its connection to the + database can be closed. + """ + # If the thread never served a request, then we don't have a + # connection. + if hasattr(self._local, 'conn'): + self._local.conn.close() diff --git a/libmuscle/python/libmuscle/manager/test/test_profile_database.py b/libmuscle/python/libmuscle/manager/test/test_profile_database.py new file mode 100644 index 00000000..af4ddae8 --- /dev/null +++ b/libmuscle/python/libmuscle/manager/test/test_profile_database.py @@ -0,0 +1,109 @@ +from libmuscle.profiling import ProfileEvent, ProfileEventType +from libmuscle.manager.profile_database import ProfileDatabase +from libmuscle.timestamp import Timestamp +from ymmsl import Operator, Port, Reference + +import sqlite3 + + +def test_create_profile_database(tmp_path): + db_path = tmp_path / 'test.db' + db = ProfileDatabase(db_path) + db.close() + + conn = sqlite3.connect(db_path, isolation_level=None) + cur = conn.cursor() + cur.execute("BEGIN TRANSACTION") + cur.execute("SELECT major_version, minor_version FROM muscle3_format") + major, minor = cur.fetchone() + assert major == 1 + assert minor == 0 + + cur.execute("SELECT oid, name FROM event_types") + etypes = cur.fetchall() + assert len(etypes) == len([e for e in ProfileEventType]) + + cur.execute("SELECT oid, name FROM port_operators") + opers = cur.fetchall() + assert len(opers) == len([o for o in Operator]) + + cur.execute("SELECT oid, name FROM instances") + instances = cur.fetchall() + assert len(instances) == 0 + + cur.execute( + "SELECT instance, event_type, start_time, stop_time, port_name," + " port_operator, port_length, slot, message_size," + " message_timestamp FROM events") + events = cur.fetchall() + assert len(events) == 0 + + cur.execute("COMMIT") + cur.close() + conn.close() + + +def test_add_events(tmp_path): + db_path = tmp_path / 'test.db' + db = ProfileDatabase(db_path) + conn = sqlite3.connect(db_path, isolation_level=None) + cur = conn.cursor() + + events = [ + ProfileEvent( + ProfileEventType.REGISTER, Timestamp(0.0), Timestamp(0.1)), + ProfileEvent( + ProfileEventType.SEND, Timestamp(0.8), Timestamp(0.812), + Port('out_port', Operator.O_I), 10, 3, 12345, 13.42), + ProfileEvent( + ProfileEventType.DEREGISTER, Timestamp(1.0), Timestamp(1.1))] + + def check_send_event(instance): + cur.execute("BEGIN TRANSACTION") + cur.execute( + "SELECT *" + " FROM events AS e, instances AS i, event_types AS et," + " port_operators AS o" + " WHERE e.instance = i.oid AND e.event_type = et.oid" + " AND e.port_operator = o.oid AND i.name = 'instance[0]'" + " AND et.name = (?)", (ProfileEventType.SEND.name,)) + events2 = cur.fetchall() + + assert len(events2) == 1 + e = events2[0] + assert e[1:10] == ( + ProfileEventType.SEND.value, 0.8, 0.812, 'out_port', + Operator.O_I.value, 10, 3, 12345, 13.42) + assert e[11] == 'instance[0]' + assert e[13] == 'SEND' + assert e[15] == 'O_I' + + cur.execute("COMMIT") + + db.add_events(Reference('instance[0]'), events) + check_send_event('instance[0]') + + db.add_events(Reference('instance[1]'), events) + check_send_event('instance[1]') + + def check_register_event(typ, start, stop): + cur.execute("BEGIN TRANSACTION") + cur.execute( + "SELECT i.name, e.start_time, e.stop_time" + " FROM events AS e, instances AS i, event_types AS et" + " WHERE e.instance = i.oid AND e.event_type = et.oid" + f" AND et.name = '{typ}'") + + events2 = cur.fetchall() + cur.execute("COMMIT") + + assert len(events2) == 2 + assert set(events2) == { + ('instance[0]', start, stop), ('instance[1]', start, stop)} + + check_register_event('REGISTER', 0.0, 0.1) + check_register_event('DEREGISTER', 1.0, 1.1) + + cur.close() + conn.close() + db.close() From 23bc37bcf6508209327f10ebfe11286ad8dd06af Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Wed, 7 Dec 2022 07:48:41 +0100 Subject: [PATCH 018/188] Add profile store and profiling tests --- integration_test/test_all.py | 38 ++++++++++++++++-- integration_test/test_cpp_macro_micro.py | 31 ++++++++++++++ libmuscle/python/libmuscle/manager/manager.py | 4 +- .../python/libmuscle/manager/mmp_server.py | 34 ++++++++++++++-- .../python/libmuscle/manager/profile_store.py | 40 +++++++++++++++++++ .../python/libmuscle/manager/test/conftest.py | 36 ++++++++++------- .../manager/test/test_mmp_request_handler.py | 9 +++-- .../libmuscle/mcp/tcp_transport_server.py | 5 +++ .../python/libmuscle/mcp/transport_server.py | 8 ++++ 9 files changed, 177 insertions(+), 28 deletions(-) create mode 100644 libmuscle/python/libmuscle/manager/profile_store.py diff --git a/integration_test/test_all.py b/integration_test/test_all.py index a9c578e0..9a9cedfe 100644 --- a/integration_test/test_all.py +++ b/integration_test/test_all.py @@ -1,4 +1,5 @@ from collections import OrderedDict +import sqlite3 import numpy as np from ymmsl import (Component, Conduit, Configuration, Model, Operator, @@ -8,6 +9,9 @@ from libmuscle.runner import run_simulation +NUM_MICROS = 10 + + def macro(): """Macro model implementation. """ @@ -21,11 +25,11 @@ def macro(): # o_i assert instance.is_vector_port('out') - for slot in range(10): + for slot in range(NUM_MICROS): instance.send('out', Message(0.0, 10.0, 'testing'), slot) # s/b - for slot in range(10): + for slot in range(NUM_MICROS): msg = instance.receive('in', slot) assert msg.data['string'] == 'testing back' assert msg.data['int'] == 42 @@ -59,12 +63,36 @@ def micro(): instance.send('out', Message(0.1, data=result)) -def test_all(log_file_in_tmpdir): +def check_profile_output(tmp_path): + conn = sqlite3.connect(tmp_path / 'performance.sqlite') + cur = conn.cursor() + + for typ in ('SEND', 'RECEIVE_TRANSFER'): + cur.execute( + "SELECT * FROM all_events" + f" WHERE instance = 'macro' AND type = '{typ}'") + res = cur.fetchall() + assert len(res) == NUM_MICROS + + cur.execute( + "SELECT * FROM all_events" + " WHERE instance = 'micro[5]' AND type = 'RECEIVE'") + res = cur.fetchall() + assert len(res) == 1 + assert res[0][4:8] == ('in', 'F_INIT', None, None) + assert res[0][8] > 0 + assert res[0][9] == 0.0 + + cur.close() + conn.close() + + +def test_all(log_file_in_tmpdir, tmp_path): """A positive all-up test of everything. """ elements = [ Component('macro', 'macro_impl'), - Component('micro', 'micro_impl', [10])] + Component('micro', 'micro_impl', [NUM_MICROS])] conduits = [ Conduit('macro.out', 'micro.in'), @@ -83,3 +111,5 @@ def test_all(log_file_in_tmpdir): implementations = {'macro_impl': macro, 'micro_impl': micro} run_simulation(configuration, implementations) + + check_profile_output(tmp_path) diff --git a/integration_test/test_cpp_macro_micro.py b/integration_test/test_cpp_macro_micro.py index fda1a232..df448a5a 100644 --- a/integration_test/test_cpp_macro_micro.py +++ b/integration_test/test_cpp_macro_micro.py @@ -1,4 +1,5 @@ from pathlib import Path +import sqlite3 import numpy as np @@ -36,6 +37,34 @@ def macro(): assert msg.timestamp == i * 10.0 +def check_profile_output(tmp_path): + conn = sqlite3.connect(tmp_path / 'performance.sqlite') + cur = conn.cursor() + + def check(instance: str, typ: str, port: str, operator: str) -> None: + cur.execute( + "SELECT * FROM all_events" + f" WHERE instance = '{instance}' AND type = '{typ}'" + " ORDER BY start_time") + res = cur.fetchall() + assert len(res) == 2 + assert res[0][4:8] == (port, operator, None, None) + assert res[0][8] > 0 + assert res[0][9] == 0.0 + + assert res[1][4:8] == (port, operator, None, None) + assert res[1][8] > 0 + assert res[1][9] == 10.0 + + check('macro', 'SEND', 'out', 'O_I') + check('micro', 'RECEIVE_TRANSFER', 'in', 'F_INIT') + check('micro', 'SEND', 'out', 'O_F') + check('macro', 'RECEIVE_DECODE', 'in', 'S') + + cur.close() + conn.close() + + @skip_if_python_only def test_cpp_macro_micro(mmp_server_config_simple, tmp_path): # create C++ micro model @@ -46,3 +75,5 @@ def test_cpp_macro_micro(mmp_server_config_simple, tmp_path): {'micro': Path('libmuscle') / 'tests' / 'micro_model_test'}, {}, {'macro': macro}) + + check_profile_output(tmp_path) diff --git a/libmuscle/python/libmuscle/manager/manager.py b/libmuscle/python/libmuscle/manager/manager.py index 4e0085bf..57d8ceda 100644 --- a/libmuscle/python/libmuscle/manager/manager.py +++ b/libmuscle/python/libmuscle/manager/manager.py @@ -10,6 +10,7 @@ from libmuscle.manager.logger import Logger from libmuscle.manager.mmp_server import MMPServer from libmuscle.manager.instance_manager import InstanceManager +from libmuscle.manager.profile_store import ProfileStore from libmuscle.manager.run_dir import RunDir from libmuscle.manager.snapshot_registry import SnapshotRegistry from libmuscle.manager.topology_store import TopologyStore @@ -41,6 +42,7 @@ def __init__( self._run_dir = run_dir log_dir = self._run_dir.path if self._run_dir else Path.cwd() self._logger = Logger(log_dir, log_level) + self._profile_store = ProfileStore(log_dir) self._topology_store = TopologyStore(configuration) self._instance_registry = InstanceRegistry() if run_dir is not None: @@ -73,7 +75,7 @@ def __init__( self._snapshot_registry.start() self._server = MMPServer( - self._logger, self._configuration, + self._logger, self._profile_store, self._configuration, self._instance_registry, self._topology_store, self._snapshot_registry, run_dir) diff --git a/libmuscle/python/libmuscle/manager/mmp_server.py b/libmuscle/python/libmuscle/manager/mmp_server.py index 43d07430..d5ed9b0e 100644 --- a/libmuscle/python/libmuscle/manager/mmp_server.py +++ b/libmuscle/python/libmuscle/manager/mmp_server.py @@ -19,6 +19,8 @@ from libmuscle.mcp.protocol import RequestType, ResponseType from libmuscle.mcp.tcp_transport_server import TcpTransportServer from libmuscle.mcp.transport_server import RequestHandler +from libmuscle.manager.profile_store import ProfileStore +from libmuscle.profiling import ProfileEvent, ProfileEventType from libmuscle.snapshot import SnapshotMetadata from libmuscle.timestamp import Timestamp @@ -55,6 +57,7 @@ class MMPRequestHandler(RequestHandler): def __init__( self, logger: Logger, + profile_store: ProfileStore, configuration: PartialConfiguration, instance_registry: InstanceRegistry, topology_store: TopologyStore, @@ -70,6 +73,7 @@ def __init__( topology_store: Keeps track of how to connect things. """ self._logger = logger + self._profile_store = profile_store self._configuration = configuration self._instance_registry = instance_registry self._topology_store = topology_store @@ -108,6 +112,14 @@ def handle_request(self, request: bytes) -> bytes: return cast(bytes, msgpack.packb(response, use_bin_type=True)) + def close(self) -> None: + """Free per-thread resources. + + On shutdown of the server, this will be called by each server + thread before it shuts down. + """ + self._profile_store.close() + def _register_instance( self, instance_id: str, locations: List[str], ports: List[List[str]], version: str = '') -> Any: @@ -261,16 +273,26 @@ def _submit_log_message( return [ResponseType.SUCCESS.value] def _submit_profile_events( - self, instance: str, events: List[List[Any]]) -> Any: + self, instance_id: str, events: List[List[Any]]) -> Any: """Handle a submit profile events request. - Not implemented yet. + Args: + instance_id: Instance that sent these events + events: Profiling events to store Returns: A list containing the following values on success: status (ResponseType): SUCCESS """ + ev = [ + ProfileEvent( + ProfileEventType(e[0]), Timestamp(e[1]), Timestamp(e[2]), + Port(e[3][0], Operator[e[3][1]]) if e[3] else None, + e[4], e[5], e[6], e[7]) + for e in events] + + self._profile_store.add_events(Reference(instance_id), ev) return [ResponseType.SUCCESS.value] def _submit_snapshot( @@ -330,6 +352,7 @@ class MMPServer: def __init__( self, logger: Logger, + profile_store: ProfileStore, configuration: PartialConfiguration, instance_registry: InstanceRegistry, topology_store: TopologyStore, @@ -345,15 +368,18 @@ def __init__( Args: logger: Logger to send log messages to + profile_store: ProfileStore to store profile data in configuration: Configuration component to get settings, checkpoints and resumes from instance_registry: To register instances with and get peer locations from topology_store: To get peers and conduits from + snapshot_registry: To register snapshots with + run_dir: To save snapshots to """ self._handler = MMPRequestHandler( - logger, configuration, instance_registry, topology_store, - snapshot_registry, run_dir) + logger, profile_store, configuration, instance_registry, + topology_store, snapshot_registry, run_dir) try: self._server = TcpTransportServer(self._handler, 9000) except OSError as e: diff --git a/libmuscle/python/libmuscle/manager/profile_store.py b/libmuscle/python/libmuscle/manager/profile_store.py new file mode 100644 index 00000000..6b576be8 --- /dev/null +++ b/libmuscle/python/libmuscle/manager/profile_store.py @@ -0,0 +1,40 @@ +from pathlib import Path +from typing import Iterable + +from libmuscle.profiling import ProfileEvent +from libmuscle.manager.profile_database import ProfileDatabase +from ymmsl import Reference + + +class ProfileStore: + """Stores profiling information to disk.""" + def __init__(self, log_dir: Path) -> None: + """Create a ProfileStore. + + This will save the recorded profiling data to a file named + ``performance.sqlite`` in the main RunDir. + + Args: + log_dir: Directory to store the database in + """ + db_file = log_dir / 'performance.sqlite' + self._db = ProfileDatabase(db_file) + + def add_events( + self, instance_id: Reference, events: Iterable[ProfileEvent] + ) -> None: + """Adds profiling events to the database. + + Args: + events: The events to add. + """ + self._db.add_events(instance_id, events) + + def close(self) -> None: + """Close the store. + + This should be called once by each thread that used this + object before it goes down, so that its connection to the + database can be closed. + """ + self._db.close() diff --git a/libmuscle/python/libmuscle/manager/test/conftest.py b/libmuscle/python/libmuscle/manager/test/conftest.py index 992a3950..78a50487 100644 --- a/libmuscle/python/libmuscle/manager/test/conftest.py +++ b/libmuscle/python/libmuscle/manager/test/conftest.py @@ -1,5 +1,3 @@ -from pathlib import Path - import pytest from ymmsl import Component, Conduit, Configuration, Model, Reference @@ -9,11 +7,12 @@ from libmuscle.manager.run_dir import RunDir from libmuscle.manager.snapshot_registry import SnapshotRegistry from libmuscle.manager.topology_store import TopologyStore +from libmuscle.manager.profile_store import ProfileStore @pytest.fixture -def logger(tmpdir): - test_logger = Logger(Path(str(tmpdir))) +def logger(tmp_path): + test_logger = Logger(tmp_path) yield test_logger test_logger.close() @@ -33,6 +32,13 @@ def mmp_configuration(): ])) +@pytest.fixture +def profile_store(tmp_path): + test_profile_store = ProfileStore(tmp_path) + yield test_profile_store + test_profile_store.close() + + @pytest.fixture def instance_registry(): return InstanceRegistry() @@ -50,11 +56,11 @@ def snapshot_registry(mmp_configuration, topology_store) -> SnapshotRegistry: @pytest.fixture def mmp_request_handler( - logger, mmp_configuration, instance_registry, topology_store, - snapshot_registry): + logger, profile_store, mmp_configuration, instance_registry, + topology_store, snapshot_registry): return MMPRequestHandler( - logger, mmp_configuration, instance_registry, topology_store, - snapshot_registry, None) + logger, profile_store, mmp_configuration, instance_registry, + topology_store, snapshot_registry, None) @pytest.fixture @@ -70,11 +76,11 @@ def loaded_instance_registry(instance_registry): @pytest.fixture def registered_mmp_request_handler( - logger, mmp_configuration, loaded_instance_registry, topology_store, - snapshot_registry): + logger, profile_store, mmp_configuration, loaded_instance_registry, + topology_store, snapshot_registry): return MMPRequestHandler( - logger, mmp_configuration, loaded_instance_registry, topology_store, - snapshot_registry, None) + logger, profile_store, mmp_configuration, loaded_instance_registry, + topology_store, snapshot_registry, None) @pytest.fixture @@ -126,9 +132,9 @@ def loaded_instance_registry2(): @pytest.fixture def registered_mmp_request_handler2( - logger, mmp_configuration, loaded_instance_registry2, topology_store2, - snapshot_registry2, tmp_path): + logger, profile_store, mmp_configuration, loaded_instance_registry2, + topology_store2, snapshot_registry2, tmp_path): return MMPRequestHandler( - logger, mmp_configuration, + logger, profile_store, mmp_configuration, loaded_instance_registry2, topology_store2, snapshot_registry2, RunDir(tmp_path)) diff --git a/libmuscle/python/libmuscle/manager/test/test_mmp_request_handler.py b/libmuscle/python/libmuscle/manager/test/test_mmp_request_handler.py index 4b615d55..131eee81 100644 --- a/libmuscle/python/libmuscle/manager/test/test_mmp_request_handler.py +++ b/libmuscle/python/libmuscle/manager/test/test_mmp_request_handler.py @@ -13,11 +13,12 @@ from libmuscle.snapshot import SnapshotMetadata -def test_create_servicer(logger, mmp_configuration, instance_registry, - topology_store, snapshot_registry): +def test_create_servicer( + logger, profile_store, mmp_configuration, instance_registry, + topology_store, snapshot_registry): MMPRequestHandler( - logger, mmp_configuration, instance_registry, topology_store, - snapshot_registry, None) + logger, profile_store, mmp_configuration, instance_registry, + topology_store, snapshot_registry, None) def test_log_message(mmp_request_handler, caplog): diff --git a/libmuscle/python/libmuscle/mcp/tcp_transport_server.py b/libmuscle/python/libmuscle/mcp/tcp_transport_server.py index 1866c315..455fde1b 100644 --- a/libmuscle/python/libmuscle/mcp/tcp_transport_server.py +++ b/libmuscle/python/libmuscle/mcp/tcp_transport_server.py @@ -58,6 +58,11 @@ def receive_request(self) -> Optional[bytes]: except SocketClosed: return None + def finish(self) -> None: + """Called when shutting down the thread?""" + server = cast(TcpTransportServerImpl, self.server).transport_server + server._handler.close() + class TcpTransportServer(TransportServer): """A TransportServer that uses TCP to communicate.""" diff --git a/libmuscle/python/libmuscle/mcp/transport_server.py b/libmuscle/python/libmuscle/mcp/transport_server.py index 66af0438..1128ba30 100644 --- a/libmuscle/python/libmuscle/mcp/transport_server.py +++ b/libmuscle/python/libmuscle/mcp/transport_server.py @@ -17,6 +17,14 @@ def handle_request(self, request: bytes) -> bytes: """ raise NotImplementedError() # pragma: no cover + def close(self) -> None: + """Free per-thread resources. + + On shutdown of the server, this will be called by each server + thread before it shuts down. + """ + pass + class ServerNotSupported(RuntimeError): pass From 6bdc815f0621bc931555e2952687a6a6f821d57b Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Fri, 6 Jan 2023 11:27:36 +0100 Subject: [PATCH 019/188] Overwrite profiling database if needed, like manager log --- .../python/libmuscle/manager/profile_database.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libmuscle/python/libmuscle/manager/profile_database.py b/libmuscle/python/libmuscle/manager/profile_database.py index 9f79c57e..6feaabf2 100644 --- a/libmuscle/python/libmuscle/manager/profile_database.py +++ b/libmuscle/python/libmuscle/manager/profile_database.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path import sqlite3 import threading @@ -7,6 +8,9 @@ from ymmsl import Operator, Reference +_logger = logging.getLogger(__name__) + + class ProfileDatabase: """Creates and updates a profiling database. @@ -21,8 +25,12 @@ def __init__(self, db_file: Path) -> None: db_file: The file to create and initialise. """ if db_file.exists(): - # TODO: maybe allow multiple objects to open the same file? - raise RuntimeError(f'File {db_file} exists, not overwriting it.') + _logger.info(f'Overwriting profiling database {db_file}') + try: + # from Python 3.8, we can use missing_ok=True + db_file.unlink() + except FileNotFoundError: + pass # Exactly how the sqlite3 Python module's automatic # transactions work is a bit mysterious, and the documentation From 136aa46c23129a0d85dfece53b8c6569d81724a1 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Fri, 6 Jan 2023 13:02:30 +0100 Subject: [PATCH 020/188] Refactor ProfileStore/ProfileDatabase --- .../libmuscle/manager/profile_database.py | 184 +++--------------- .../python/libmuscle/manager/profile_store.py | 156 +++++++++++++-- ...file_database.py => test_profile_store.py} | 13 +- 3 files changed, 174 insertions(+), 179 deletions(-) rename libmuscle/python/libmuscle/manager/test/{test_profile_database.py => test_profile_store.py} (93%) diff --git a/libmuscle/python/libmuscle/manager/profile_database.py b/libmuscle/python/libmuscle/manager/profile_database.py index 6feaabf2..9fd67869 100644 --- a/libmuscle/python/libmuscle/manager/profile_database.py +++ b/libmuscle/python/libmuscle/manager/profile_database.py @@ -1,170 +1,23 @@ -import logging from pathlib import Path import sqlite3 import threading -from typing import Iterable, Optional, Tuple - -from libmuscle.profiling import ProfileEvent, ProfileEventType -from ymmsl import Operator, Reference - - -_logger = logging.getLogger(__name__) +from typing import cast class ProfileDatabase: - """Creates and updates a profiling database. + """Accesses a profiling database. - This class creates the database, and then allow writing to it. It - should be changed in concert with ProfileDatabase to ensure the - data format remains in sync. + This class accesses a MUSCLE3 profiling database and provides basic + analysis functionality. """ def __init__(self, db_file: Path) -> None: - """Create a new profile database. + """Open a ProfileDatabase. - Args: - db_file: The file to create and initialise. + This opens the database file and creates a ProfileDatabase + object that operates on it. """ - if db_file.exists(): - _logger.info(f'Overwriting profiling database {db_file}') - try: - # from Python 3.8, we can use missing_ok=True - db_file.unlink() - except FileNotFoundError: - pass - - # Exactly how the sqlite3 Python module's automatic - # transactions work is a bit mysterious, and the documentation - # of sqlite itself isn't perfect either. So we set - # isolation_level to None, which doesn't actually affect the - # effective isolation level (!) but does keep the Python module - # from starting transactions automatically, and then we use - # explicit BEGIN TRANSACTION statements to keep the sqlite - # library from doing anything automatically. That way, it's - # clear what's going on from the code. - # - # Also, connections are expensive and cannot be used in other - # threads. We have a multithreaded TCP server in the manager, - # so we use thread-local storage to get a connection in each - # thread. self._db_file = db_file - self._local = threading.local() - self._local.conn = sqlite3.connect(db_file, isolation_level=None) - - cur = self._local.conn.cursor() - cur.execute("BEGIN IMMEDIATE TRANSACTION") - cur.execute( - "CREATE TABLE muscle3_format (" - " major_version INTEGER NOT NULL," - " minor_version INTEGER NOT NULL)") - cur.execute( - "INSERT INTO muscle3_format(major_version, minor_version)" - " VALUES (1, 0)") - - cur.execute( - "CREATE TABLE event_types (" - " oid INTEGER PRIMARY KEY," - " name TEXT UNIQUE)") - event_types = [(t.value, t.name) for t in ProfileEventType] - cur.executemany( - "INSERT INTO event_types (oid, name) VALUES (?, ?)", - event_types) - - cur.execute( - "CREATE TABLE port_operators (" - " oid INTEGER PRIMARY KEY," - " name TEXT UNIQUE)") - port_operators = [(o.value, o.name) for o in Operator] - cur.executemany( - "INSERT INTO port_operators (oid, name) VALUES (?, ?)", - port_operators) - - cur.execute( - "CREATE TABLE instances (" - " oid INTEGER PRIMARY KEY," - " name TEXT UNIQUE)") - - cur.execute( - "CREATE TABLE events (" - " instance INTEGER NOT NULL REFERENCES instances(oid)," - " event_type INTEGER NOT NULL REFERENCES event_types(oid)," - " start_time DOUBLE NOT NULL," - " stop_time DOUBLE NOT NULL," - " port_name TEXT," - " port_operator INTEGER REFERENCES port_operators(oid)," - " port_length INTEGER," - " slot INTEGER," - " message_size INTEGER," - " message_timestamp DOUBLE)") - - cur.execute( - "CREATE VIEW all_events (" - " instance, type, start_time, stop_time, port, operator," - " port_length, slot, message_size, message_timestamp)" - " AS SELECT" - " i.name, et.name, e.start_time, e.stop_time, e.port_name," - " o.name, e.port_length, e.slot, e.message_size," - " e.message_timestamp" - " FROM" - " events e" - " JOIN instances i ON e.instance = i.oid" - " LEFT JOIN event_types et ON e.event_type = et.oid" - " LEFT JOIN port_operators o ON e.port_operator = o.oid") - - cur.execute("COMMIT") - cur.close() - - def add_events( - self, instance_id: Reference, events: Iterable[ProfileEvent] - ) -> None: - """Adds profiling events to the database. - - Args: - events: The events to add. - """ - if not hasattr(self._local, 'conn'): - self._local.conn = sqlite3.connect( - self._db_file, isolation_level=None) - cur = self._local.conn.cursor() - cur.execute("BEGIN IMMEDIATE TRANSACTION") - cur.execute( - "SELECT oid FROM instances WHERE name = ?", - (str(instance_id),)) - oids = cur.fetchall() - if oids: - instance_oid = oids[0][0] - else: - cur.execute( - "INSERT INTO instances (name) VALUES (?) RETURNING oid", - (str(instance_id),)) - instance_oid = cur.fetchone()[0] - - Record = Tuple[ - int, int, float, float, Optional[str], Optional[int], - Optional[int], Optional[int], Optional[int], - Optional[float]] - - def to_tuple(e: ProfileEvent) -> Record: - # Tell mypy this shouldn't happen - assert e.start_time is not None - assert e.stop_time is not None - - port_name = None if e.port is None else str(e.port.name) - port_operator = None if e.port is None else e.port.operator.value - - return ( - instance_oid, e.event_type.value, e.start_time.seconds, - e.stop_time.seconds, port_name, port_operator, - e.port_length, e.slot, e.message_size, e.message_timestamp) - - cur.executemany( - "INSERT INTO events" - " (instance, event_type, start_time, stop_time, port_name," - " port_operator, port_length, slot, message_size," - " message_timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", - list(map(to_tuple, events))) - cur.execute("COMMIT") - cur.close() def close(self) -> None: """Close the connection to the database. @@ -177,3 +30,26 @@ def close(self) -> None: # connection. if hasattr(self._local, 'conn'): self._local.conn.close() + + def _get_cursor(self) -> sqlite3.Cursor: + """Get a connection to run queries with. + + Exactly how the sqlite3 Python module's automatic + transactions work is a bit mysterious, and the documentation + of sqlite itself isn't perfect either. So we set + isolation_level to None, which doesn't actually affect the + effective isolation level (!) but does keep the Python module + from starting transactions automatically, and then we use + explicit BEGIN TRANSACTION statements to keep the sqlite + library from doing anything automatically. That way, it's + clear what's going on from the code. + + Also, connections are expensive and cannot be used in other + threads. We have a multithreaded TCP server in the manager, + so we use thread-local storage to get a connection in each + thread. + """ + if not hasattr(self._local, 'conn'): + self._local.conn = sqlite3.connect( + self._db_file, isolation_level=None) + return cast(sqlite3.Cursor, self._local.conn.cursor()) diff --git a/libmuscle/python/libmuscle/manager/profile_store.py b/libmuscle/python/libmuscle/manager/profile_store.py index 6b576be8..dc122e92 100644 --- a/libmuscle/python/libmuscle/manager/profile_store.py +++ b/libmuscle/python/libmuscle/manager/profile_store.py @@ -1,24 +1,41 @@ +import logging from pathlib import Path -from typing import Iterable +import sqlite3 +from typing import Iterable, Optional, Tuple -from libmuscle.profiling import ProfileEvent +from libmuscle.profiling import ProfileEvent, ProfileEventType from libmuscle.manager.profile_database import ProfileDatabase -from ymmsl import Reference +from ymmsl import Operator, Reference -class ProfileStore: - """Stores profiling information to disk.""" - def __init__(self, log_dir: Path) -> None: - """Create a ProfileStore. +_logger = logging.getLogger(__name__) - This will save the recorded profiling data to a file named - ``performance.sqlite`` in the main RunDir. + +class ProfileStore(ProfileDatabase): + """Creates and fills a profiling database. + + This class creates the database, and then allows writing to it. + It's only used internally, and it's almost a non-const version + of ProfileDatabase. + """ + def __init__(self, db_dir: Path) -> None: + """Create a new profile database. Args: - log_dir: Directory to store the database in + db_file: The file to create and initialise. """ - db_file = log_dir / 'performance.sqlite' - self._db = ProfileDatabase(db_file) + db_file = db_dir / 'performance.sqlite' + + if db_file.exists(): + _logger.info(f'Overwriting profiling database {db_file}') + try: + # from Python 3.8, we can use missing_ok=True + db_file.unlink() + except FileNotFoundError: + pass + + super().__init__(db_file) + self._init_database() def add_events( self, instance_id: Reference, events: Iterable[ProfileEvent] @@ -28,13 +45,114 @@ def add_events( Args: events: The events to add. """ - self._db.add_events(instance_id, events) + if not hasattr(self._local, 'conn'): + self._local.conn = sqlite3.connect( + self._db_file, isolation_level=None) + cur = self._local.conn.cursor() + cur.execute("BEGIN IMMEDIATE TRANSACTION") + cur.execute( + "SELECT oid FROM instances WHERE name = ?", + (str(instance_id),)) + oids = cur.fetchall() + if oids: + instance_oid = oids[0][0] + else: + cur.execute( + "INSERT INTO instances (name) VALUES (?) RETURNING oid", + (str(instance_id),)) + instance_oid = cur.fetchone()[0] + + Record = Tuple[ + int, int, float, float, Optional[str], Optional[int], + Optional[int], Optional[int], Optional[int], + Optional[float]] + + def to_tuple(e: ProfileEvent) -> Record: + # Tell mypy this shouldn't happen + assert e.start_time is not None + assert e.stop_time is not None + + port_name = None if e.port is None else str(e.port.name) + port_operator = None if e.port is None else e.port.operator.value + + return ( + instance_oid, e.event_type.value, e.start_time.seconds, + e.stop_time.seconds, port_name, port_operator, + e.port_length, e.slot, e.message_size, e.message_timestamp) - def close(self) -> None: - """Close the store. + cur.executemany( + "INSERT INTO events" + " (instance, event_type, start_time, stop_time, port_name," + " port_operator, port_length, slot, message_size," + " message_timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + list(map(to_tuple, events))) + cur.execute("COMMIT") + cur.close() - This should be called once by each thread that used this - object before it goes down, so that its connection to the - database can be closed. + def _init_database(self) -> None: + """Initialises the database. + + This creates the SQL tables needed to store the data. """ - self._db.close() + cur = self._get_cursor() + cur.execute("BEGIN IMMEDIATE TRANSACTION") + cur.execute( + "CREATE TABLE muscle3_format (" + " major_version INTEGER NOT NULL," + " minor_version INTEGER NOT NULL)") + cur.execute( + "INSERT INTO muscle3_format(major_version, minor_version)" + " VALUES (1, 0)") + + cur.execute( + "CREATE TABLE event_types (" + " oid INTEGER PRIMARY KEY," + " name TEXT UNIQUE)") + event_types = [(t.value, t.name) for t in ProfileEventType] + cur.executemany( + "INSERT INTO event_types (oid, name) VALUES (?, ?)", + event_types) + + cur.execute( + "CREATE TABLE port_operators (" + " oid INTEGER PRIMARY KEY," + " name TEXT UNIQUE)") + port_operators = [(o.value, o.name) for o in Operator] + cur.executemany( + "INSERT INTO port_operators (oid, name) VALUES (?, ?)", + port_operators) + + cur.execute( + "CREATE TABLE instances (" + " oid INTEGER PRIMARY KEY," + " name TEXT UNIQUE)") + + cur.execute( + "CREATE TABLE events (" + " instance INTEGER NOT NULL REFERENCES instances(oid)," + " event_type INTEGER NOT NULL REFERENCES event_types(oid)," + " start_time DOUBLE NOT NULL," + " stop_time DOUBLE NOT NULL," + " port_name TEXT," + " port_operator INTEGER REFERENCES port_operators(oid)," + " port_length INTEGER," + " slot INTEGER," + " message_size INTEGER," + " message_timestamp DOUBLE)") + + cur.execute( + "CREATE VIEW all_events (" + " instance, type, start_time, stop_time, port, operator," + " port_length, slot, message_size, message_timestamp)" + " AS SELECT" + " i.name, et.name, e.start_time, e.stop_time, e.port_name," + " o.name, e.port_length, e.slot, e.message_size," + " e.message_timestamp" + " FROM" + " events e" + " JOIN instances i ON e.instance = i.oid" + " LEFT JOIN event_types et ON e.event_type = et.oid" + " LEFT JOIN port_operators o ON e.port_operator = o.oid") + + cur.execute("COMMIT") + cur.close() diff --git a/libmuscle/python/libmuscle/manager/test/test_profile_database.py b/libmuscle/python/libmuscle/manager/test/test_profile_store.py similarity index 93% rename from libmuscle/python/libmuscle/manager/test/test_profile_database.py rename to libmuscle/python/libmuscle/manager/test/test_profile_store.py index af4ddae8..5bac4a64 100644 --- a/libmuscle/python/libmuscle/manager/test/test_profile_database.py +++ b/libmuscle/python/libmuscle/manager/test/test_profile_store.py @@ -1,16 +1,16 @@ from libmuscle.profiling import ProfileEvent, ProfileEventType -from libmuscle.manager.profile_database import ProfileDatabase +from libmuscle.manager.profile_store import ProfileStore from libmuscle.timestamp import Timestamp from ymmsl import Operator, Port, Reference import sqlite3 -def test_create_profile_database(tmp_path): - db_path = tmp_path / 'test.db' - db = ProfileDatabase(db_path) +def test_create_profile_store(tmp_path): + db = ProfileStore(tmp_path) db.close() + db_path = tmp_path / 'performance.sqlite' conn = sqlite3.connect(db_path, isolation_level=None) cur = conn.cursor() cur.execute("BEGIN TRANSACTION") @@ -44,8 +44,9 @@ def test_create_profile_database(tmp_path): def test_add_events(tmp_path): - db_path = tmp_path / 'test.db' - db = ProfileDatabase(db_path) + db = ProfileStore(tmp_path) + + db_path = tmp_path / 'performance.sqlite' conn = sqlite3.connect(db_path, isolation_level=None) cur = conn.cursor() From edfe4a26cf0e56b3fd0abb27192e39bc79fea194 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Sat, 7 Jan 2023 22:31:28 +0100 Subject: [PATCH 021/188] Use integer timestamps for profiling --- libmuscle/cpp/src/libmuscle/communicator.cpp | 7 ++- libmuscle/cpp/src/libmuscle/instance.cpp | 6 +-- .../libmuscle/mcp/tcp_transport_client.cpp | 7 ++- .../src/libmuscle/mcp/transport_client.hpp | 5 +- libmuscle/cpp/src/libmuscle/mmp_client.cpp | 6 +-- libmuscle/cpp/src/libmuscle/profiler.cpp | 7 +-- libmuscle/cpp/src/libmuscle/profiling.cpp | 48 +++++++++++++++++-- libmuscle/cpp/src/libmuscle/profiling.hpp | 37 ++++++++++++-- .../libmuscle/tests/mocks/mock_mpp_client.cpp | 5 +- .../libmuscle/tests/mocks/mock_mpp_client.hpp | 5 +- .../cpp/src/libmuscle/tests/test_profiler.cpp | 24 ++++++---- libmuscle/python/libmuscle/communicator.py | 15 +++--- libmuscle/python/libmuscle/instance.py | 12 +++-- .../python/libmuscle/manager/mmp_server.py | 6 ++- .../python/libmuscle/manager/profile_store.py | 8 ++-- .../manager/test/test_profile_store.py | 19 ++++---- .../libmuscle/mcp/tcp_transport_client.py | 9 ++-- .../python/libmuscle/mcp/transport_client.py | 4 +- libmuscle/python/libmuscle/mmp_client.py | 2 +- libmuscle/python/libmuscle/profiler.py | 5 +- libmuscle/python/libmuscle/profiling.py | 29 ++++++++--- .../python/libmuscle/test/test_profiler.py | 16 +++---- libmuscle/python/libmuscle/timestamp.py | 4 ++ 23 files changed, 193 insertions(+), 93 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/communicator.cpp b/libmuscle/cpp/src/libmuscle/communicator.cpp index 94bc6f04..97b78f94 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/communicator.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include @@ -121,7 +120,7 @@ void Communicator::send_message( Port & port = ports_.at(port_name); ProfileEvent profile_event( - ProfileEventType::send, Timestamp(), {}, port, {}, slot, + ProfileEventType::send, ProfileTimestamp(), {}, port, {}, slot, {}, message.timestamp()); auto recv_endpoints = peer_manager_->get_peer_endpoints( @@ -190,7 +189,7 @@ Message Communicator::receive_message( Port & port = (ports_.count(port_name)) ? (ports_.at(port_name)) : muscle_settings_in_.get(); ProfileEvent receive_event( - ProfileEventType::receive, Timestamp(), {}, port, {}, slot); + ProfileEventType::receive, ProfileTimestamp(), {}, port, {}, slot); // peer_manager already checks that there is at most one snd_endpoint // connected to the port we receive on @@ -200,7 +199,7 @@ Message Communicator::receive_message( auto msg_and_profile = client.receive(recv_endpoint.ref()); ProfileEvent recv_decode_event( - ProfileEventType::receive_decode, Timestamp(), {}, port, {}, slot, + ProfileEventType::receive_decode, ProfileTimestamp(), {}, port, {}, slot, std::get<0>(msg_and_profile).size()); auto mpp_message = MPPMessage::from_bytes(std::get<0>(msg_and_profile)); diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index 1c330ba1..85b498e9 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -420,7 +420,7 @@ void Instance::Impl::send( /* Register this instance with the manager. */ void Instance::Impl::register_() { - ProfileEvent register_event(ProfileEventType::register_, Timestamp()); + ProfileEvent register_event(ProfileEventType::register_, ProfileTimestamp()); auto locations = communicator_->get_locations(); auto port_list = list_declared_ports_(); manager_->register_instance(locations, port_list); @@ -431,7 +431,7 @@ void Instance::Impl::register_() { /* Connect this instance to the given peers / conduits. */ void Instance::Impl::connect_() { - ProfileEvent connect_event(ProfileEventType::connect, Timestamp()); + ProfileEvent connect_event(ProfileEventType::connect, ProfileTimestamp()); auto peer_info = manager_->request_peers(); communicator_->connect(std::get<0>(peer_info), std::get<1>(peer_info), std::get<2>(peer_info)); settings_manager_.base = manager_->get_settings(); @@ -442,7 +442,7 @@ void Instance::Impl::connect_() { /* Deregister this instance from the manager. */ void Instance::Impl::deregister_() { - ProfileEvent deregister_event(ProfileEventType::deregister, Timestamp()); + ProfileEvent deregister_event(ProfileEventType::deregister, ProfileTimestamp()); manager_->deregister_instance(); profiler_->record_event(std::move(deregister_event)); // This is the last thing we'll profile, so flush messages diff --git a/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.cpp b/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.cpp index 39fe2600..746e3a74 100644 --- a/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.cpp +++ b/libmuscle/cpp/src/libmuscle/mcp/tcp_transport_client.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include @@ -125,14 +124,14 @@ TcpTransportClient::~TcpTransportClient() { std::tuple TcpTransportClient::call( char const * req_buf, std::size_t req_len ) const { - Timestamp start_wait; + ProfileTimestamp start_wait; send_frame(socket_fd_, req_buf, req_len); int64_t length = recv_int64(socket_fd_); - Timestamp start_transfer; + ProfileTimestamp start_transfer; auto result = Data::byte_array(length); recv_all(socket_fd_, result.as_byte_array(), result.size()); - Timestamp stop_transfer; + ProfileTimestamp stop_transfer; return std::make_tuple( result, std::make_tuple(start_wait, start_transfer, stop_transfer)); } diff --git a/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp b/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp index 45cdeb72..a58633eb 100644 --- a/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp +++ b/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp @@ -2,7 +2,7 @@ #include -#include +#include #include #include @@ -12,7 +12,8 @@ namespace libmuscle { namespace impl { namespace mcp { -using ProfileData = std::tuple; +using ProfileData = std::tuple< + ProfileTimestamp, ProfileTimestamp, ProfileTimestamp>; /** A client that connects to an MCP transport server. diff --git a/libmuscle/cpp/src/libmuscle/mmp_client.cpp b/libmuscle/cpp/src/libmuscle/mmp_client.cpp index 262b8dd5..d468a21d 100644 --- a/libmuscle/cpp/src/libmuscle/mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/mmp_client.cpp @@ -33,7 +33,7 @@ using ymmsl::SettingValue; namespace { const float connection_timeout = 300.0f; - const std::chrono::milliseconds peer_timeout(600000); // milliseconds + const std::chrono::milliseconds peer_timeout(600000); const int peer_interval_min = 5000; // milliseconds const int peer_interval_max = 10000; // milliseconds @@ -87,8 +87,8 @@ namespace { return Data::list( static_cast(event.event_type), - event.start_time.get().seconds, - event.stop_time.get().seconds, + event.start_time.get().nanoseconds, + event.stop_time.get().nanoseconds, encoded_port, encode_optional(event.port_length), encode_optional(event.slot), encode_optional(event.message_size), encode_optional(event.message_timestamp)); diff --git a/libmuscle/cpp/src/libmuscle/profiler.cpp b/libmuscle/cpp/src/libmuscle/profiler.cpp index a99d3bfd..4b264ae7 100644 --- a/libmuscle/cpp/src/libmuscle/profiler.cpp +++ b/libmuscle/cpp/src/libmuscle/profiler.cpp @@ -1,9 +1,6 @@ #include -#include - - -using libmuscle::impl::Timestamp; +#include namespace libmuscle { namespace impl { @@ -19,7 +16,7 @@ void Profiler::shutdown() { void Profiler::record_event(ProfileEvent && event) { if (!event.stop_time.is_set()) - event.stop_time = Timestamp(); + event.stop_time = ProfileTimestamp(); events_.push_back(std::move(event)); if (events_.size() >= 100) flush_(); diff --git a/libmuscle/cpp/src/libmuscle/profiling.cpp b/libmuscle/cpp/src/libmuscle/profiling.cpp index 9583cd97..2160d105 100644 --- a/libmuscle/cpp/src/libmuscle/profiling.cpp +++ b/libmuscle/cpp/src/libmuscle/profiling.cpp @@ -1,17 +1,55 @@ #include -#include +#include using ymmsl::Port; +using std::chrono::duration_cast; +using ns = std::chrono::nanoseconds; +using std::chrono::steady_clock; +using std::chrono::system_clock; + + +namespace { + int64_t get_time_ref_() { + auto now_steady = steady_clock::now(); + auto now_wall = system_clock::now(); + + int64_t now_steady_ns = duration_cast( + now_steady.time_since_epoch()).count(); + int64_t now_wall_ns = duration_cast( + now_wall.time_since_epoch()).count(); + return now_wall_ns - now_steady_ns; + } +} namespace libmuscle { namespace impl { +/* Note that this is not inlined, as translation units are compiled + * separately and we don't use LTO. That should keep the compiler from + * moving it with respect to the thing we want to profile while optimising. + */ +ProfileTimestamp::ProfileTimestamp() { + auto now = steady_clock::now().time_since_epoch(); + this->nanoseconds = duration_cast(now).count() + time_ref_; +} + +ProfileTimestamp::ProfileTimestamp(int64_t nanoseconds) + : nanoseconds(nanoseconds) +{} + +int64_t ProfileTimestamp::time_ref_ = get_time_ref_(); + +std::ostream & operator<<(std::ostream & os, ProfileTimestamp ts) { + return os << ts.nanoseconds; +} + + ProfileEvent::ProfileEvent( ProfileEventType event_type, - Optional start_time, - Optional stop_time, + Optional start_time, + Optional stop_time, Optional const & port, Optional port_length, Optional slot, @@ -28,11 +66,11 @@ ProfileEvent::ProfileEvent( {} void ProfileEvent::start() { - start_time = Timestamp(); + start_time = ProfileTimestamp(); } void ProfileEvent::stop() { - stop_time = Timestamp(); + stop_time = ProfileTimestamp(); } } } diff --git a/libmuscle/cpp/src/libmuscle/profiling.hpp b/libmuscle/cpp/src/libmuscle/profiling.hpp index 71b65dec..8f937e81 100644 --- a/libmuscle/cpp/src/libmuscle/profiling.hpp +++ b/libmuscle/cpp/src/libmuscle/profiling.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -27,6 +28,34 @@ enum class ProfileEventType { }; +/** A timestamp for profiling. + * + * This has higher resolution than Timestamp, storing a number of + * nanoseconds since the epoch in an int64_t. The epoch is usually + * close to the UNIX epoch. + */ +class ProfileTimestamp { + public: + /** Create a timestamp. + */ + ProfileTimestamp(); + + /** Create a timestamp for a given time point. + * + * @param nanoseconds: Time to set. If unset, use the current time. + */ + ProfileTimestamp(int64_t nanoseconds); + + /// Number of nanoseconds since the epoch. + int64_t nanoseconds; + + private: + static int64_t time_ref_; +}; + +std::ostream & operator<<(std::ostream & os, ProfileTimestamp ts); + + /** A profile event as used by MUSCLE3. * * This represents a single measurement of the timing of some event that @@ -50,8 +79,8 @@ class ProfileEvent { */ ProfileEvent( ProfileEventType event_type, - Optional start_time = Optional(), - Optional stop_time = Optional(), + Optional start_time = Optional(), + Optional stop_time = Optional(), Optional const & port = Optional(), Optional port_length = Optional(), Optional slot = Optional(), @@ -68,10 +97,10 @@ class ProfileEvent { ProfileEventType event_type; /// When the event started (real-world, not simulation time). - Optional start_time; + Optional start_time; /// When the event ended (real-world, not simulation time). - Optional stop_time; + Optional stop_time; /// Port used for sending or receiving, if applicable. Optional port; diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp index c4c99850..f1d7ba9a 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -24,7 +24,8 @@ std::tuple MockMPPClient::receive( return std::make_tuple( next_receive_message.encoded(), std::make_tuple( - Timestamp(1.0), Timestamp(2.0), Timestamp(3.0))); + ProfileTimestamp(1.0), ProfileTimestamp(2.0), + ProfileTimestamp(3.0))); } void MockMPPClient::close() {} diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp index 793e276e..f2ab5815 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include @@ -14,7 +14,8 @@ namespace libmuscle { namespace impl { -using ProfileData = std::tuple; +using ProfileData = std::tuple< + ProfileTimestamp, ProfileTimestamp, ProfileTimestamp>; class MockMPPClient { diff --git a/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp b/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp index 365c3f56..102cb94b 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp @@ -21,6 +21,7 @@ using libmuscle::impl::Profiler; using libmuscle::impl::ProfileEvent; using libmuscle::impl::ProfileEventType; +using libmuscle::impl::ProfileTimestamp; using libmuscle::impl::MockMMPClient; using ymmsl::Port; @@ -43,8 +44,12 @@ struct TestProfiler { // Helpers for comparison, not needed in the main code so put here. -bool operator==(Timestamp const & lhs, Timestamp const & rhs) { - return lhs.seconds == rhs.seconds; +bool operator==(ProfileTimestamp const & lhs, ProfileTimestamp const & rhs) { + return lhs.nanoseconds == rhs.nanoseconds; +} + +bool operator<(ProfileTimestamp const & lhs, ProfileTimestamp const & rhs) { + return lhs.nanoseconds < rhs.nanoseconds; } bool operator==(Port const & lhs, Port const & rhs) { @@ -98,7 +103,7 @@ TEST(libmuscle_profiler, test_recording_events) { MockMMPClient mock_mmp_client(Reference("test_instance[10]"), ""); Profiler profiler(mock_mmp_client); - Timestamp t1, t2; + ProfileTimestamp t1, t2; ProfileEvent e(ProfileEventType::register_, t1, t2); profiler.record_event(ProfileEvent(e)); @@ -114,7 +119,7 @@ TEST(libmuscle_profiler, test_auto_stop_time) { MockMMPClient mock_mmp_client(Reference("test_instance[10]"), ""); Profiler profiler(mock_mmp_client); - Timestamp t1; + ProfileTimestamp t1; ProfileEvent e(ProfileEventType::send, t1); profiler.record_event(std::move(e)); @@ -122,7 +127,7 @@ TEST(libmuscle_profiler, test_auto_stop_time) { auto const & e2 = TestProfiler::events_(profiler).at(0); ASSERT_EQ(e2.start_time, t1); ASSERT_TRUE(e2.stop_time.is_set()); - ASSERT_TRUE(e2.start_time.get().seconds < e2.stop_time.get().seconds); + ASSERT_TRUE(e2.start_time.get() < e2.stop_time.get()); } TEST(libmuscle_profiler, test_send_to_mock_mmp_client) { @@ -130,17 +135,20 @@ TEST(libmuscle_profiler, test_send_to_mock_mmp_client) { MockMMPClient mock_mmp_client(Reference("test_instance[10]"), ""); Profiler profiler(mock_mmp_client); - ProfileEvent e1(ProfileEventType::receive, Timestamp(), Timestamp()); + ProfileEvent e1( + ProfileEventType::receive, ProfileTimestamp(), ProfileTimestamp()); profiler.record_event(ProfileEvent(e1)); for (int i = 1; i < 99; ++i) { - ProfileEvent e(ProfileEventType::send, Timestamp(), Timestamp()); + ProfileEvent e( + ProfileEventType::send, ProfileTimestamp(), ProfileTimestamp()); profiler.record_event(std::move(e)); } ASSERT_EQ(mock_mmp_client.last_submitted_profile_events.size(), 0u); - ProfileEvent e2(ProfileEventType::receive_transfer, Timestamp(), Timestamp()); + ProfileEvent e2( + ProfileEventType::receive_transfer, ProfileTimestamp(), ProfileTimestamp()); profiler.record_event(ProfileEvent(e2)); ASSERT_EQ(mock_mmp_client.last_submitted_profile_events.size(), 100u); diff --git a/libmuscle/python/libmuscle/communicator.py b/libmuscle/python/libmuscle/communicator.py index e800d03e..933fee75 100644 --- a/libmuscle/python/libmuscle/communicator.py +++ b/libmuscle/python/libmuscle/communicator.py @@ -11,8 +11,8 @@ from libmuscle.post_office import PostOffice from libmuscle.port import Port from libmuscle.profiler import Profiler -from libmuscle.profiling import ProfileEvent, ProfileEventType -from libmuscle.timestamp import Timestamp +from libmuscle.profiling import ( + ProfileEvent, ProfileEventType, ProfileTimestamp) _logger = logging.getLogger(__name__) @@ -216,8 +216,8 @@ def send_message( port = self._ports[port_name] profile_event = ProfileEvent( - ProfileEventType.SEND, Timestamp(), None, port, None, slot, - None, message.timestamp) + ProfileEventType.SEND, ProfileTimestamp(), None, port, None, + slot, None, message.timestamp) recv_endpoints = self._peer_manager.get_peer_endpoints( snd_endpoint.port, slot_list) @@ -306,7 +306,8 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, port = self._muscle_settings_in receive_event = ProfileEvent( - ProfileEventType.RECEIVE, Timestamp(), None, port, None, slot) + ProfileEventType.RECEIVE, ProfileTimestamp(), None, port, None, + slot) # peer_manager already checks that there is at most one snd_endpoint # connected to the port we receive on @@ -316,8 +317,8 @@ def receive_message(self, port_name: str, slot: Optional[int] = None, mpp_message_bytes, profile = client.receive(recv_endpoint.ref()) recv_decode_event = ProfileEvent( - ProfileEventType.RECEIVE_DECODE, Timestamp(), None, port, None, - slot, len(mpp_message_bytes)) + ProfileEventType.RECEIVE_DECODE, ProfileTimestamp(), None, + port, None, slot, len(mpp_message_bytes)) mpp_message = MPPMessage.from_bytes(mpp_message_bytes) recv_decode_event.stop() diff --git a/libmuscle/python/libmuscle/instance.py b/libmuscle/python/libmuscle/instance.py index adcd0777..fd553f6a 100644 --- a/libmuscle/python/libmuscle/instance.py +++ b/libmuscle/python/libmuscle/instance.py @@ -20,9 +20,9 @@ from libmuscle.mpp_message import ClosePort from libmuscle.mmp_client import MMPClient from libmuscle.profiler import Profiler -from libmuscle.profiling import ProfileEvent, ProfileEventType +from libmuscle.profiling import ( + ProfileEvent, ProfileEventType, ProfileTimestamp) from libmuscle.snapshot_manager import SnapshotManager -from libmuscle.timestamp import Timestamp from libmuscle.util import extract_log_file_location @@ -707,7 +707,8 @@ def __f_init_max_timestamp(self) -> Optional[float]: def _register(self) -> None: """Register this instance with the manager. """ - register_event = ProfileEvent(ProfileEventType.REGISTER, Timestamp()) + register_event = ProfileEvent( + ProfileEventType.REGISTER, ProfileTimestamp()) locations = self._communicator.get_locations() port_list = self.__list_declared_ports() self.__manager.register_instance(locations, port_list) @@ -717,7 +718,8 @@ def _register(self) -> None: def _connect(self) -> None: """Connect this instance to the given peers / conduits. """ - connect_event = ProfileEvent(ProfileEventType.CONNECT, Timestamp()) + connect_event = ProfileEvent( + ProfileEventType.CONNECT, ProfileTimestamp()) conduits, peer_dims, peer_locations = self.__manager.request_peers() self._communicator.connect(conduits, peer_dims, peer_locations) self._settings_manager.base = self.__manager.get_settings() @@ -728,7 +730,7 @@ def _deregister(self) -> None: """Deregister this instance from the manager. """ deregister_event = ProfileEvent( - ProfileEventType.DEREGISTER, Timestamp()) + ProfileEventType.DEREGISTER, ProfileTimestamp()) self.__manager.deregister_instance() self._profiler.record_event(deregister_event) # this is the last thing we'll profile, so flush messages diff --git a/libmuscle/python/libmuscle/manager/mmp_server.py b/libmuscle/python/libmuscle/manager/mmp_server.py index d5ed9b0e..eb4dba2e 100644 --- a/libmuscle/python/libmuscle/manager/mmp_server.py +++ b/libmuscle/python/libmuscle/manager/mmp_server.py @@ -20,7 +20,8 @@ from libmuscle.mcp.tcp_transport_server import TcpTransportServer from libmuscle.mcp.transport_server import RequestHandler from libmuscle.manager.profile_store import ProfileStore -from libmuscle.profiling import ProfileEvent, ProfileEventType +from libmuscle.profiling import ( + ProfileEvent, ProfileEventType, ProfileTimestamp) from libmuscle.snapshot import SnapshotMetadata from libmuscle.timestamp import Timestamp @@ -287,7 +288,8 @@ def _submit_profile_events( """ ev = [ ProfileEvent( - ProfileEventType(e[0]), Timestamp(e[1]), Timestamp(e[2]), + ProfileEventType(e[0]), ProfileTimestamp(e[1]), + ProfileTimestamp(e[2]), Port(e[3][0], Operator[e[3][1]]) if e[3] else None, e[4], e[5], e[6], e[7]) for e in events] diff --git a/libmuscle/python/libmuscle/manager/profile_store.py b/libmuscle/python/libmuscle/manager/profile_store.py index dc122e92..af7bdf21 100644 --- a/libmuscle/python/libmuscle/manager/profile_store.py +++ b/libmuscle/python/libmuscle/manager/profile_store.py @@ -76,8 +76,8 @@ def to_tuple(e: ProfileEvent) -> Record: port_operator = None if e.port is None else e.port.operator.value return ( - instance_oid, e.event_type.value, e.start_time.seconds, - e.stop_time.seconds, port_name, port_operator, + instance_oid, e.event_type.value, e.start_time.nanoseconds, + e.stop_time.nanoseconds, port_name, port_operator, e.port_length, e.slot, e.message_size, e.message_timestamp) cur.executemany( @@ -131,8 +131,8 @@ def _init_database(self) -> None: "CREATE TABLE events (" " instance INTEGER NOT NULL REFERENCES instances(oid)," " event_type INTEGER NOT NULL REFERENCES event_types(oid)," - " start_time DOUBLE NOT NULL," - " stop_time DOUBLE NOT NULL," + " start_time INTEGER NOT NULL," + " stop_time INTEGER NOT NULL," " port_name TEXT," " port_operator INTEGER REFERENCES port_operators(oid)," " port_length INTEGER," diff --git a/libmuscle/python/libmuscle/manager/test/test_profile_store.py b/libmuscle/python/libmuscle/manager/test/test_profile_store.py index 5bac4a64..8e6523ef 100644 --- a/libmuscle/python/libmuscle/manager/test/test_profile_store.py +++ b/libmuscle/python/libmuscle/manager/test/test_profile_store.py @@ -1,6 +1,6 @@ -from libmuscle.profiling import ProfileEvent, ProfileEventType +from libmuscle.profiling import ( + ProfileEvent, ProfileEventType, ProfileTimestamp) from libmuscle.manager.profile_store import ProfileStore -from libmuscle.timestamp import Timestamp from ymmsl import Operator, Port, Reference import sqlite3 @@ -52,12 +52,15 @@ def test_add_events(tmp_path): events = [ ProfileEvent( - ProfileEventType.REGISTER, Timestamp(0.0), Timestamp(0.1)), + ProfileEventType.REGISTER, ProfileTimestamp(0), + ProfileTimestamp(1000)), ProfileEvent( - ProfileEventType.SEND, Timestamp(0.8), Timestamp(0.812), + ProfileEventType.SEND, ProfileTimestamp(800), + ProfileTimestamp(812), Port('out_port', Operator.O_I), 10, 3, 12345, 13.42), ProfileEvent( - ProfileEventType.DEREGISTER, Timestamp(1.0), Timestamp(1.1))] + ProfileEventType.DEREGISTER, ProfileTimestamp(1000000000000), + ProfileTimestamp(1100000000000))] def check_send_event(instance): cur.execute("BEGIN TRANSACTION") @@ -73,7 +76,7 @@ def check_send_event(instance): assert len(events2) == 1 e = events2[0] assert e[1:10] == ( - ProfileEventType.SEND.value, 0.8, 0.812, 'out_port', + ProfileEventType.SEND.value, 800, 812, 'out_port', Operator.O_I.value, 10, 3, 12345, 13.42) assert e[11] == 'instance[0]' assert e[13] == 'SEND' @@ -102,8 +105,8 @@ def check_register_event(typ, start, stop): assert set(events2) == { ('instance[0]', start, stop), ('instance[1]', start, stop)} - check_register_event('REGISTER', 0.0, 0.1) - check_register_event('DEREGISTER', 1.0, 1.1) + check_register_event('REGISTER', 0, 1000) + check_register_event('DEREGISTER', 1000000000000, 1100000000000) cur.close() conn.close() diff --git a/libmuscle/python/libmuscle/mcp/tcp_transport_client.py b/libmuscle/python/libmuscle/mcp/tcp_transport_client.py index 83852850..1fefe77a 100644 --- a/libmuscle/python/libmuscle/mcp/tcp_transport_client.py +++ b/libmuscle/python/libmuscle/mcp/tcp_transport_client.py @@ -1,10 +1,9 @@ import socket -from time import time from typing import Optional, Tuple from libmuscle.mcp.transport_client import ProfileData, TransportClient from libmuscle.mcp.tcp_util import recv_all, recv_int64, send_int64 -from libmuscle.timestamp import Timestamp +from libmuscle.profiling import ProfileTimestamp class TcpTransportClient(TransportClient): @@ -62,15 +61,15 @@ def call(self, request: bytes) -> Tuple[bytes, ProfileData]: Returns: The received response """ - start_wait = Timestamp(time()) + start_wait = ProfileTimestamp() send_int64(self._socket, len(request)) self._socket.sendall(request) length = recv_int64(self._socket) - start_transfer = Timestamp(time()) + start_transfer = ProfileTimestamp() response = recv_all(self._socket, length) - stop_transfer = Timestamp(time()) + stop_transfer = ProfileTimestamp() return response, (start_wait, start_transfer, stop_transfer) def close(self) -> None: diff --git a/libmuscle/python/libmuscle/mcp/transport_client.py b/libmuscle/python/libmuscle/mcp/transport_client.py index 183cc909..55942dc9 100644 --- a/libmuscle/python/libmuscle/mcp/transport_client.py +++ b/libmuscle/python/libmuscle/mcp/transport_client.py @@ -1,9 +1,9 @@ from typing import Tuple -from libmuscle.timestamp import Timestamp +from libmuscle.profiling import ProfileTimestamp -ProfileData = Tuple[Timestamp, Timestamp, Timestamp] +ProfileData = Tuple[ProfileTimestamp, ProfileTimestamp, ProfileTimestamp] class TransportClient: diff --git a/libmuscle/python/libmuscle/mmp_client.py b/libmuscle/python/libmuscle/mmp_client.py index 7c203383..da21b547 100644 --- a/libmuscle/python/libmuscle/mmp_client.py +++ b/libmuscle/python/libmuscle/mmp_client.py @@ -53,7 +53,7 @@ def encode_profile_event(event: ProfileEvent) -> Any: encoded_port = encode_port(event.port) if event.port else None return [ event.event_type.value, - event.start_time.seconds, event.stop_time.seconds, + event.start_time.nanoseconds, event.stop_time.nanoseconds, encoded_port, event.port_length, event.slot, event.message_size, event.message_timestamp] diff --git a/libmuscle/python/libmuscle/profiler.py b/libmuscle/python/libmuscle/profiler.py index ef990a5a..cf7435fd 100644 --- a/libmuscle/python/libmuscle/profiler.py +++ b/libmuscle/python/libmuscle/profiler.py @@ -1,8 +1,7 @@ from typing import List from libmuscle.mmp_client import MMPClient -from libmuscle.profiling import ProfileEvent -from libmuscle.timestamp import Timestamp +from libmuscle.profiling import ProfileEvent, ProfileTimestamp class Profiler: @@ -34,7 +33,7 @@ def record_event(self, event: ProfileEvent) -> None: event: The event to record. """ if event.stop_time is None: - event.stop_time = Timestamp() + event.stop_time = ProfileTimestamp() self._events.append(event) if len(self._events) >= 100: self.__flush() diff --git a/libmuscle/python/libmuscle/profiling.py b/libmuscle/python/libmuscle/profiling.py index 6cfc8fc7..23e0790c 100644 --- a/libmuscle/python/libmuscle/profiling.py +++ b/libmuscle/python/libmuscle/profiling.py @@ -1,10 +1,9 @@ from enum import Enum +from time import perf_counter_ns, time_ns from typing import Optional from ymmsl import Port -from libmuscle.timestamp import Timestamp - class ProfileEventType(Enum): """Profiling event types for MUSCLE3.""" @@ -18,6 +17,24 @@ class ProfileEventType(Enum): RECEIVE_DECODE = 7 +class ProfileTimestamp: + """A timestamp for profiling. + + This has higher resolution than Timestamp, storing a number of + nanoseconds since the UNIX epoch in an int. + + Attributes: + nanoseconds: Nanoseconds since the UNIX epoch. + """ + _time_ref = time_ns() - perf_counter_ns() + + def __init__(self, nanoseconds: Optional[int] = None) -> None: + """Create a timestamp representing now.""" + if nanoseconds is None: + nanoseconds = perf_counter_ns() + self._time_ref + self.nanoseconds = nanoseconds + + class ProfileEvent: """A profile event as used by MUSCLE3. @@ -53,8 +70,8 @@ class ProfileEvent: def __init__( self, event_type: ProfileEventType, - start_time: Optional[Timestamp] = None, - stop_time: Optional[Timestamp] = None, + start_time: Optional[ProfileTimestamp] = None, + stop_time: Optional[ProfileTimestamp] = None, port: Optional[Port] = None, port_length: Optional[int] = None, slot: Optional[int] = None, @@ -74,9 +91,9 @@ def __init__( def start(self) -> None: """Sets start_time to the current time. """ - self.start_time = Timestamp() + self.start_time = ProfileTimestamp() def stop(self) -> None: """Sets stop_time to the current time. """ - self.stop_time = Timestamp() + self.stop_time = ProfileTimestamp() diff --git a/libmuscle/python/libmuscle/test/test_profiler.py b/libmuscle/python/libmuscle/test/test_profiler.py index d74be45c..54dbe186 100644 --- a/libmuscle/python/libmuscle/test/test_profiler.py +++ b/libmuscle/python/libmuscle/test/test_profiler.py @@ -1,12 +1,12 @@ -from libmuscle.profiling import ProfileEvent, ProfileEventType -from libmuscle.timestamp import Timestamp +from libmuscle.profiling import ( + ProfileEvent, ProfileEventType, ProfileTimestamp) def test_recording_events(mocked_profiler) -> None: profiler, _ = mocked_profiler - t1 = Timestamp() - t2 = Timestamp() + t1 = ProfileTimestamp() + t2 = ProfileTimestamp() e = ProfileEvent(ProfileEventType.REGISTER, t1, t2) profiler.record_event(e) @@ -19,26 +19,26 @@ def test_recording_events(mocked_profiler) -> None: def test_auto_stop_time(mocked_profiler) -> None: profiler, _ = mocked_profiler - t1 = Timestamp() + t1 = ProfileTimestamp() e = ProfileEvent(ProfileEventType.SEND, t1) profiler.record_event(e) assert e.start_time == t1 assert e.stop_time is not None - assert e.start_time.seconds < e.stop_time.seconds + assert e.start_time.nanoseconds < e.stop_time.nanoseconds def test_send_to_manager(mocked_profiler) -> None: profiler, mock_mmp_client = mocked_profiler for i in range(99): - e1 = ProfileEvent(ProfileEventType.RECEIVE, Timestamp()) + e1 = ProfileEvent(ProfileEventType.RECEIVE, ProfileTimestamp()) profiler.record_event(e1) assert mock_mmp_client.sent_events is None - e2 = ProfileEvent(ProfileEventType.RECEIVE, Timestamp()) + e2 = ProfileEvent(ProfileEventType.RECEIVE, ProfileTimestamp()) profiler.record_event(e2) assert mock_mmp_client.sent_events is not None diff --git a/libmuscle/python/libmuscle/timestamp.py b/libmuscle/python/libmuscle/timestamp.py index 01636d9b..1cd2589f 100644 --- a/libmuscle/python/libmuscle/timestamp.py +++ b/libmuscle/python/libmuscle/timestamp.py @@ -10,6 +10,10 @@ class Timestamp: seconds: The number of seconds since the start of 1970. """ def __init__(self, seconds: Optional[float] = None) -> None: + """Create a Timestamp representing the given time, or now. + + If seconds is None, the current time is used. + """ if seconds is None: seconds = time.time() self.seconds = seconds From f4d3e5a37c3d09b69e3b114296b7042ff2ec93ed Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Sat, 11 Feb 2023 22:18:30 +0100 Subject: [PATCH 022/188] Make SQL statements look more secure --- integration_test/test_all.py | 2 +- integration_test/test_cpp_macro_micro.py | 4 ++-- libmuscle/python/libmuscle/manager/test/test_profile_store.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/integration_test/test_all.py b/integration_test/test_all.py index 9a9cedfe..c4aada74 100644 --- a/integration_test/test_all.py +++ b/integration_test/test_all.py @@ -70,7 +70,7 @@ def check_profile_output(tmp_path): for typ in ('SEND', 'RECEIVE_TRANSFER'): cur.execute( "SELECT * FROM all_events" - f" WHERE instance = 'macro' AND type = '{typ}'") + " WHERE instance = 'macro' AND type = ?", (typ,)) res = cur.fetchall() assert len(res) == NUM_MICROS diff --git a/integration_test/test_cpp_macro_micro.py b/integration_test/test_cpp_macro_micro.py index df448a5a..10e60828 100644 --- a/integration_test/test_cpp_macro_micro.py +++ b/integration_test/test_cpp_macro_micro.py @@ -44,8 +44,8 @@ def check_profile_output(tmp_path): def check(instance: str, typ: str, port: str, operator: str) -> None: cur.execute( "SELECT * FROM all_events" - f" WHERE instance = '{instance}' AND type = '{typ}'" - " ORDER BY start_time") + " WHERE instance = ? AND type = ?" + " ORDER BY start_time", (instance, typ)) res = cur.fetchall() assert len(res) == 2 assert res[0][4:8] == (port, operator, None, None) diff --git a/libmuscle/python/libmuscle/manager/test/test_profile_store.py b/libmuscle/python/libmuscle/manager/test/test_profile_store.py index 8e6523ef..8a7b1bbf 100644 --- a/libmuscle/python/libmuscle/manager/test/test_profile_store.py +++ b/libmuscle/python/libmuscle/manager/test/test_profile_store.py @@ -96,7 +96,7 @@ def check_register_event(typ, start, stop): "SELECT i.name, e.start_time, e.stop_time" " FROM events AS e, instances AS i, event_types AS et" " WHERE e.instance = i.oid AND e.event_type = et.oid" - f" AND et.name = '{typ}'") + " AND et.name = ?", (typ,)) events2 = cur.fetchall() cur.execute("COMMIT") From 88df0b559145b101db0712f111f15645feacd515 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Sat, 11 Feb 2023 22:44:44 +0100 Subject: [PATCH 023/188] Make code more respectable (extra reference and a tie) --- libmuscle/cpp/src/libmuscle/communicator.cpp | 20 ++++++++++--------- .../src/libmuscle/mcp/transport_client.hpp | 4 ++++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/communicator.cpp b/libmuscle/cpp/src/libmuscle/communicator.cpp index 97b78f94..1e5f6ac7 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/communicator.cpp @@ -197,12 +197,13 @@ Message Communicator::receive_message( recv_endpoint.port, slot_list).at(0); MPPClient & client = get_client_(snd_endpoint.instance()); auto msg_and_profile = client.receive(recv_endpoint.ref()); + auto & msg = std::get<0>(msg_and_profile); ProfileEvent recv_decode_event( ProfileEventType::receive_decode, ProfileTimestamp(), {}, port, {}, slot, - std::get<0>(msg_and_profile).size()); + msg.size()); - auto mpp_message = MPPMessage::from_bytes(std::get<0>(msg_and_profile)); + auto mpp_message = MPPMessage::from_bytes(msg); Settings overlay_settings(mpp_message.settings_overlay.as()); recv_decode_event.stop(); @@ -224,16 +225,17 @@ Message Communicator::receive_message( port.set_closed(); } - auto profile = std::get<1>(msg_and_profile); + ProfileTimestamp start_recv, end_wait, end_transfer; + std::tie(start_recv, end_wait, end_transfer) = std::get<1>(msg_and_profile); ProfileEvent recv_wait_event( - ProfileEventType::receive_wait, std::get<0>(profile), - std::get<1>(profile), port, mpp_message.port_length, slot, - std::get<0>(msg_and_profile).size(), message.timestamp()); + ProfileEventType::receive_wait, start_recv, + end_wait, port, mpp_message.port_length, slot, + msg.size(), message.timestamp()); ProfileEvent recv_xfer_event( - ProfileEventType::receive_transfer, std::get<1>(profile), - std::get<2>(profile), port, mpp_message.port_length, slot, - std::get<0>(msg_and_profile).size(), message.timestamp()); + ProfileEventType::receive_transfer, end_wait, + end_transfer, port, mpp_message.port_length, slot, + msg.size(), message.timestamp()); recv_decode_event.message_timestamp = message.timestamp(); receive_event.message_timestamp = message.timestamp(); diff --git a/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp b/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp index a58633eb..171afb50 100644 --- a/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp +++ b/libmuscle/cpp/src/libmuscle/mcp/transport_client.hpp @@ -12,6 +12,10 @@ namespace libmuscle { namespace impl { namespace mcp { +/** Timeline of a receive. + * + * This is (start, end of wait and beginning of transfer, end) + */ using ProfileData = std::tuple< ProfileTimestamp, ProfileTimestamp, ProfileTimestamp>; From b52dee18bfae886217fb1944c8d268160ba681a4 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Sat, 11 Feb 2023 23:04:02 +0100 Subject: [PATCH 024/188] Use better assertions --- libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp b/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp index 102cb94b..c1a1b0a2 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_profiler.cpp @@ -127,7 +127,7 @@ TEST(libmuscle_profiler, test_auto_stop_time) { auto const & e2 = TestProfiler::events_(profiler).at(0); ASSERT_EQ(e2.start_time, t1); ASSERT_TRUE(e2.stop_time.is_set()); - ASSERT_TRUE(e2.start_time.get() < e2.stop_time.get()); + ASSERT_LT(e2.start_time.get(), e2.stop_time.get()); } TEST(libmuscle_profiler, test_send_to_mock_mmp_client) { @@ -152,7 +152,7 @@ TEST(libmuscle_profiler, test_send_to_mock_mmp_client) { profiler.record_event(ProfileEvent(e2)); ASSERT_EQ(mock_mmp_client.last_submitted_profile_events.size(), 100u); - ASSERT_TRUE(mock_mmp_client.last_submitted_profile_events.at(0) == e1); - ASSERT_TRUE(mock_mmp_client.last_submitted_profile_events.at(99) == e2); + ASSERT_EQ(mock_mmp_client.last_submitted_profile_events.at(0), e1); + ASSERT_EQ(mock_mmp_client.last_submitted_profile_events.at(99), e2); } From fd25c8b8efee058510413cf40bfbc80946358dda Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Sat, 11 Feb 2023 23:04:14 +0100 Subject: [PATCH 025/188] Use cleaner syntax for converting time --- libmuscle/cpp/src/libmuscle/timestamp.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/timestamp.cpp b/libmuscle/cpp/src/libmuscle/timestamp.cpp index f24a89f6..e78b1671 100644 --- a/libmuscle/cpp/src/libmuscle/timestamp.cpp +++ b/libmuscle/cpp/src/libmuscle/timestamp.cpp @@ -12,8 +12,7 @@ namespace libmuscle { namespace impl { Timestamp::Timestamp() { auto since_epoch = wallclock::now().time_since_epoch(); - double cycles = since_epoch.count(); - seconds = cycles * wallclock::period::num / wallclock::period::den; + seconds = std::chrono::duration(since_epoch).count(); } Timestamp::Timestamp(double seconds) From e11a6576412552b53d0162cd2a7ea68d1fb6bd00 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Sat, 11 Feb 2023 23:18:34 +0100 Subject: [PATCH 026/188] Use that handy _get_cursor method --- libmuscle/python/libmuscle/manager/profile_store.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/libmuscle/python/libmuscle/manager/profile_store.py b/libmuscle/python/libmuscle/manager/profile_store.py index af7bdf21..0a1ae573 100644 --- a/libmuscle/python/libmuscle/manager/profile_store.py +++ b/libmuscle/python/libmuscle/manager/profile_store.py @@ -1,6 +1,5 @@ import logging from pathlib import Path -import sqlite3 from typing import Iterable, Optional, Tuple from libmuscle.profiling import ProfileEvent, ProfileEventType @@ -45,10 +44,7 @@ def add_events( Args: events: The events to add. """ - if not hasattr(self._local, 'conn'): - self._local.conn = sqlite3.connect( - self._db_file, isolation_level=None) - cur = self._local.conn.cursor() + cur = self._get_cursor() cur.execute("BEGIN IMMEDIATE TRANSACTION") cur.execute( "SELECT oid FROM instances WHERE name = ?", From d6b2cb3af86e9325f34f4f713327f6622ca01b45 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Sat, 11 Feb 2023 23:23:55 +0100 Subject: [PATCH 027/188] Avoid RETURNING, it's too recent, and use lastrowid instead --- libmuscle/python/libmuscle/manager/profile_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libmuscle/python/libmuscle/manager/profile_store.py b/libmuscle/python/libmuscle/manager/profile_store.py index 0a1ae573..7b462ca9 100644 --- a/libmuscle/python/libmuscle/manager/profile_store.py +++ b/libmuscle/python/libmuscle/manager/profile_store.py @@ -54,9 +54,9 @@ def add_events( instance_oid = oids[0][0] else: cur.execute( - "INSERT INTO instances (name) VALUES (?) RETURNING oid", + "INSERT INTO instances (name) VALUES (?)", (str(instance_id),)) - instance_oid = cur.fetchone()[0] + instance_oid = cur.lastrowid Record = Tuple[ int, int, float, float, Optional[str], Optional[int], From cac6981604bc627d10a9b5add8bee6085903de9c Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Sat, 11 Feb 2023 23:26:57 +0100 Subject: [PATCH 028/188] Use enum len(), it has one! --- libmuscle/python/libmuscle/manager/test/test_profile_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libmuscle/python/libmuscle/manager/test/test_profile_store.py b/libmuscle/python/libmuscle/manager/test/test_profile_store.py index 8a7b1bbf..7b6a2512 100644 --- a/libmuscle/python/libmuscle/manager/test/test_profile_store.py +++ b/libmuscle/python/libmuscle/manager/test/test_profile_store.py @@ -21,11 +21,11 @@ def test_create_profile_store(tmp_path): cur.execute("SELECT oid, name FROM event_types") etypes = cur.fetchall() - assert len(etypes) == len([e for e in ProfileEventType]) + assert len(etypes) == len(ProfileEventType) cur.execute("SELECT oid, name FROM port_operators") opers = cur.fetchall() - assert len(opers) == len([o for o in Operator]) + assert len(opers) == len(Operator) cur.execute("SELECT oid, name FROM instances") instances = cur.fetchall() From 2b2b060ebd925a2ecbcec15ab2a89598b7689cd9 Mon Sep 17 00:00:00 2001 From: Lourens Veen Date: Sat, 11 Feb 2023 23:27:20 +0100 Subject: [PATCH 029/188] Remove superfluous list --- libmuscle/python/libmuscle/manager/profile_store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libmuscle/python/libmuscle/manager/profile_store.py b/libmuscle/python/libmuscle/manager/profile_store.py index 7b462ca9..7fe7929b 100644 --- a/libmuscle/python/libmuscle/manager/profile_store.py +++ b/libmuscle/python/libmuscle/manager/profile_store.py @@ -81,7 +81,7 @@ def to_tuple(e: ProfileEvent) -> Record: " (instance, event_type, start_time, stop_time, port_name," " port_operator, port_length, slot, message_size," " message_timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", - list(map(to_tuple, events))) + map(to_tuple, events)) cur.execute("COMMIT") cur.close() From 6e78d99e3e522225a583c33eab61bbf400439267 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 15 Feb 2023 14:30:15 +0100 Subject: [PATCH 030/188] Fortran: simplify LIBMUSCLE_Instance_create Using optional arguments --- .../cpp/build/libmuscle/libmuscle.version | 3 +- .../cpp/build/libmuscle/libmuscle_mpi.version | 3 +- .../bindings/libmuscle_fortran_c.cpp | 19 +- .../bindings/libmuscle_mpi_fortran_c.cpp | 26 +- libmuscle/fortran/src/libmuscle/libmuscle.f90 | 59 +-- .../fortran/src/libmuscle/libmuscle_mpi.f90 | 191 ++------- scripts/make_libmuscle_api.py | 402 +++++------------- 7 files changed, 172 insertions(+), 531 deletions(-) diff --git a/libmuscle/cpp/build/libmuscle/libmuscle.version b/libmuscle/cpp/build/libmuscle/libmuscle.version index 6a5400c4..ea4e7325 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle.version @@ -322,8 +322,7 @@ LIBMUSCLE_Message_get_settings_; LIBMUSCLE_Message_set_settings_; LIBMUSCLE_Message_unset_settings_; - LIBMUSCLE_Instance_create_autoports_; - LIBMUSCLE_Instance_create_with_ports_; + LIBMUSCLE_Instance_create_; LIBMUSCLE_Instance_free_; LIBMUSCLE_Instance_reuse_instance_default_; LIBMUSCLE_Instance_reuse_instance_apply_; diff --git a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version index a6cb4915..ea4e7325 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version @@ -322,8 +322,7 @@ LIBMUSCLE_Message_get_settings_; LIBMUSCLE_Message_set_settings_; LIBMUSCLE_Message_unset_settings_; - LIBMUSCLE_Instance_create_autoports_cr_; - LIBMUSCLE_Instance_create_with_ports_cr_; + LIBMUSCLE_Instance_create_; LIBMUSCLE_Instance_free_; LIBMUSCLE_Instance_reuse_instance_default_; LIBMUSCLE_Instance_reuse_instance_apply_; diff --git a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp index d20562d5..17a3d9d3 100644 --- a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp +++ b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp @@ -3667,21 +3667,18 @@ void LIBMUSCLE_Message_unset_settings_(std::intptr_t self) { return; } -std::intptr_t LIBMUSCLE_Instance_create_autoports_(std::intptr_t cla) { - CmdLineArgs * cla_p = reinterpret_cast(cla); - Instance * result = new Instance(cla_p->argc(), cla_p->argv()); - return reinterpret_cast(result); -} - -std::intptr_t LIBMUSCLE_Instance_create_with_ports_( +std::intptr_t LIBMUSCLE_Instance_create_( std::intptr_t cla, std::intptr_t ports ) { CmdLineArgs * cla_p = reinterpret_cast(cla); - PortsDescription * ports_p = reinterpret_cast( - ports); - Instance * result = new Instance( - cla_p->argc(), cla_p->argv(), *ports_p); + Instance * result; + if (ports == 0) { + result = new Instance(cla_p->argc(), cla_p->argv()); + } else { + PortsDescription * ports_p = reinterpret_cast(ports); + result = new Instance(cla_p->argc(), cla_p->argv(), *ports_p); + } return reinterpret_cast(result); } diff --git a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp index fb1ba471..cbe4e5cf 100644 --- a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp +++ b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp @@ -3667,28 +3667,22 @@ void LIBMUSCLE_Message_unset_settings_(std::intptr_t self) { return; } -std::intptr_t LIBMUSCLE_Instance_create_autoports_cr_( - std::intptr_t cla, - int communicator, - int root -) { - CmdLineArgs * cla_p = reinterpret_cast(cla); - MPI_Comm communicator_m = MPI_Comm_f2c(communicator); - Instance * result = new Instance(cla_p->argc(), cla_p->argv(), communicator_m, root); - return reinterpret_cast(result); -} - -std::intptr_t LIBMUSCLE_Instance_create_with_ports_cr_( +std::intptr_t LIBMUSCLE_Instance_create_( std::intptr_t cla, std::intptr_t ports, int communicator, int root ) { CmdLineArgs * cla_p = reinterpret_cast(cla); - PortsDescription * ports_p = reinterpret_cast( - ports); MPI_Comm communicator_m = MPI_Comm_f2c(communicator); - Instance * result = new Instance( - cla_p->argc(), cla_p->argv(), *ports_p, communicator_m, root); + Instance * result; + if (ports == 0) { + result = new Instance( + cla_p->argc(), cla_p->argv(), communicator_m, root); + } else { + PortsDescription * ports_p = reinterpret_cast(ports); + result = new Instance( + cla_p->argc(), cla_p->argv(), *ports_p, communicator_m, root); + } return reinterpret_cast(result); } diff --git a/libmuscle/fortran/src/libmuscle/libmuscle.f90 b/libmuscle/fortran/src/libmuscle/libmuscle.f90 index 3241020e..22177eb5 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle.f90 @@ -428,8 +428,6 @@ module libmuscle end type LIBMUSCLE_Instance public :: LIBMUSCLE_Instance - public :: LIBMUSCLE_Instance_create_autoports - public :: LIBMUSCLE_Instance_create_with_ports public :: LIBMUSCLE_Instance_create public :: LIBMUSCLE_Instance_free public :: LIBMUSCLE_Instance_reuse_instance_default @@ -2953,22 +2951,15 @@ subroutine LIBMUSCLE_Message_unset_settings_(self) & integer (c_intptr_t), value, intent(in) :: self end subroutine LIBMUSCLE_Message_unset_settings_ - integer (c_intptr_t) function LIBMUSCLE_Instance_create_autoports_(cla) & - bind(C, name="LIBMUSCLE_Instance_create_autoports_") - - use iso_c_binding - integer (c_intptr_t), value, intent(in) :: cla - end function LIBMUSCLE_Instance_create_autoports_ - - integer (c_intptr_t) function LIBMUSCLE_Instance_create_with_ports_( & + integer (c_intptr_t) function LIBMUSCLE_Instance_create_( & cla, & ports) & - bind(C, name="LIBMUSCLE_Instance_create_with_ports_") + bind(C, name="LIBMUSCLE_Instance_create_") use iso_c_binding integer (c_intptr_t), value, intent(in) :: cla integer (c_intptr_t), value, intent(in) :: ports - end function LIBMUSCLE_Instance_create_with_ports_ + end function LIBMUSCLE_Instance_create_ subroutine LIBMUSCLE_Instance_free_(self) & bind(C, name="LIBMUSCLE_Instance_free_") @@ -3819,12 +3810,6 @@ end function LIBMUSCLE_Instance_receive_with_settings_psd_ LIBMUSCLE_Message_set_data_dcr end interface - interface LIBMUSCLE_Instance_create - module procedure & - LIBMUSCLE_Instance_create_autoports, & - LIBMUSCLE_Instance_create_with_ports - end interface - interface LIBMUSCLE_Instance_reuse_instance module procedure & LIBMUSCLE_Instance_reuse_instance_default, & @@ -16505,11 +16490,12 @@ subroutine LIBMUSCLE_Message_unset_settings( & self%ptr) end subroutine LIBMUSCLE_Message_unset_settings - type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_autoports() + type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create(ports) implicit none + type(LIBMUSCLE_PortsDescription), intent(in), optional :: ports integer :: num_args, i, arg_len - integer (c_intptr_t) :: cla + integer (c_intptr_t) :: cla, ports_ptr character (kind=c_char, len=:), allocatable :: cur_arg num_args = command_argument_count() @@ -16523,33 +16509,14 @@ type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_autoports() cla, i, cur_arg, int(len(cur_arg), c_size_t)) deallocate(cur_arg) end do - LIBMUSCLE_Instance_create_autoports%ptr = & - LIBMUSCLE_Instance_create_autoports_(cla) - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) - end function LIBMUSCLE_Instance_create_autoports - - type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_with_ports(ports) - implicit none - - type(LIBMUSCLE_PortsDescription) :: ports - integer :: num_args, i, arg_len - integer (c_intptr_t) :: cla - character (kind=c_char, len=:), allocatable :: cur_arg - - num_args = command_argument_count() - cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1) - do i = 0, num_args - call get_command_argument(i, length=arg_len) - allocate (character(arg_len+1) :: cur_arg) - call get_command_argument(i, value=cur_arg) - cur_arg(arg_len+1:arg_len+1) = c_null_char - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( & - cla, i, cur_arg, int(len(cur_arg), c_size_t)) - deallocate(cur_arg) - end do - LIBMUSCLE_Instance_create_with_ports%ptr = LIBMUSCLE_Instance_create_with_ports_(cla, ports%ptr) + if (present(ports)) then + ports_ptr = ports%ptr + else + ports_ptr = 0 + end if + LIBMUSCLE_Instance_create%ptr = LIBMUSCLE_Instance_create_(cla, ports_ptr) call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) - end function LIBMUSCLE_Instance_create_with_ports + end function LIBMUSCLE_Instance_create subroutine LIBMUSCLE_Instance_free( & self) diff --git a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 index 2e587eb4..f0bf41a5 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 @@ -429,12 +429,6 @@ module libmuscle_mpi end type LIBMUSCLE_Instance public :: LIBMUSCLE_Instance - public :: LIBMUSCLE_Instance_create_autoports_cr - public :: LIBMUSCLE_Instance_create_autoports_c - public :: LIBMUSCLE_Instance_create_autoports - public :: LIBMUSCLE_Instance_create_with_ports_cr - public :: LIBMUSCLE_Instance_create_with_ports_c - public :: LIBMUSCLE_Instance_create_with_ports public :: LIBMUSCLE_Instance_create public :: LIBMUSCLE_Instance_free public :: LIBMUSCLE_Instance_reuse_instance_default @@ -2958,31 +2952,19 @@ subroutine LIBMUSCLE_Message_unset_settings_(self) & integer (c_intptr_t), value, intent(in) :: self end subroutine LIBMUSCLE_Message_unset_settings_ - integer (c_intptr_t) function LIBMUSCLE_Instance_create_autoports_cr_( & - cla, & - communicator, & - root) & - bind(C, name="LIBMUSCLE_Instance_create_autoports_cr_") - - use iso_c_binding - integer (c_intptr_t), value, intent(in) :: cla - integer (c_int), value, intent(in) :: communicator - integer (c_int), value, intent(in) :: root - end function LIBMUSCLE_Instance_create_autoports_cr_ - - integer (c_intptr_t) function LIBMUSCLE_Instance_create_with_ports_cr_( & + integer (c_intptr_t) function LIBMUSCLE_Instance_create_( & cla, & ports, & communicator, & root) & - bind(C, name="LIBMUSCLE_Instance_create_with_ports_cr_") + bind(C, name="LIBMUSCLE_Instance_create_") use iso_c_binding integer (c_intptr_t), value, intent(in) :: cla integer (c_intptr_t), value, intent(in) :: ports integer (c_int), value, intent(in) :: communicator integer (c_int), value, intent(in) :: root - end function LIBMUSCLE_Instance_create_with_ports_cr_ + end function LIBMUSCLE_Instance_create_ subroutine LIBMUSCLE_Instance_free_(self) & bind(C, name="LIBMUSCLE_Instance_free_") @@ -3833,16 +3815,6 @@ end function LIBMUSCLE_Instance_receive_with_settings_psd_ LIBMUSCLE_Message_set_data_dcr end interface - interface LIBMUSCLE_Instance_create - module procedure & - LIBMUSCLE_Instance_create_autoports_cr, & - LIBMUSCLE_Instance_create_autoports_c, & - LIBMUSCLE_Instance_create_autoports, & - LIBMUSCLE_Instance_create_with_ports_cr, & - LIBMUSCLE_Instance_create_with_ports_c, & - LIBMUSCLE_Instance_create_with_ports - end interface - interface LIBMUSCLE_Instance_reuse_instance module procedure & LIBMUSCLE_Instance_reuse_instance_default, & @@ -16523,114 +16495,15 @@ subroutine LIBMUSCLE_Message_unset_settings( & self%ptr) end subroutine LIBMUSCLE_Message_unset_settings - type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_autoports_cr( & - communicator, root) - implicit none - integer :: communicator, root - - integer :: num_args, i, arg_len - integer (c_intptr_t) :: cla - character (kind=c_char, len=:), allocatable :: cur_arg - - num_args = command_argument_count() - cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1) - do i = 0, num_args - call get_command_argument(i, length=arg_len) - allocate (character(arg_len+1) :: cur_arg) - call get_command_argument(i, value=cur_arg) - cur_arg(arg_len+1:arg_len+1) = c_null_char - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( & - cla, i, cur_arg, int(len(cur_arg), c_size_t)) - deallocate(cur_arg) - end do - LIBMUSCLE_Instance_create_autoports_cr%ptr = & - LIBMUSCLE_Instance_create_autoports_cr_(cla, communicator, root) - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) - end function LIBMUSCLE_Instance_create_autoports_cr - - type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_autoports_c( & - communicator) - implicit none - integer :: communicator - - integer :: num_args, i, arg_len - integer (c_intptr_t) :: cla - character (kind=c_char, len=:), allocatable :: cur_arg - - num_args = command_argument_count() - cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1) - do i = 0, num_args - call get_command_argument(i, length=arg_len) - allocate (character(arg_len+1) :: cur_arg) - call get_command_argument(i, value=cur_arg) - cur_arg(arg_len+1:arg_len+1) = c_null_char - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( & - cla, i, cur_arg, int(len(cur_arg), c_size_t)) - deallocate(cur_arg) - end do - LIBMUSCLE_Instance_create_autoports_c%ptr = & - LIBMUSCLE_Instance_create_autoports_cr_(cla, communicator, 0) - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) - end function LIBMUSCLE_Instance_create_autoports_c - - type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_autoports() - implicit none - - integer :: num_args, i, arg_len - integer (c_intptr_t) :: cla - character (kind=c_char, len=:), allocatable :: cur_arg - - num_args = command_argument_count() - cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1) - do i = 0, num_args - call get_command_argument(i, length=arg_len) - allocate (character(arg_len+1) :: cur_arg) - call get_command_argument(i, value=cur_arg) - cur_arg(arg_len+1:arg_len+1) = c_null_char - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( & - cla, i, cur_arg, int(len(cur_arg), c_size_t)) - deallocate(cur_arg) - end do - LIBMUSCLE_Instance_create_autoports%ptr = & - LIBMUSCLE_Instance_create_autoports_cr_(cla, MPI_COMM_WORLD, 0) - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) - end function LIBMUSCLE_Instance_create_autoports - - type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_with_ports_cr( & + type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create( & ports, communicator, root) implicit none - type(LIBMUSCLE_PortsDescription) :: ports - integer :: communicator, root - integer :: num_args, i, arg_len - integer (c_intptr_t) :: cla - character (kind=c_char, len=:), allocatable :: cur_arg - - num_args = command_argument_count() - cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1) - do i = 0, num_args - call get_command_argument(i, length=arg_len) - allocate (character(arg_len+1) :: cur_arg) - call get_command_argument(i, value=cur_arg) - cur_arg(arg_len+1:arg_len+1) = c_null_char - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( & - cla, i, cur_arg, int(len(cur_arg), c_size_t)) - deallocate(cur_arg) - end do - LIBMUSCLE_Instance_create_with_ports_cr%ptr = & - LIBMUSCLE_Instance_create_with_ports_cr_( & - cla, ports%ptr, communicator, root) - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) - end function LIBMUSCLE_Instance_create_with_ports_cr - - type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_with_ports_c( & - ports, communicator) - implicit none - - type(LIBMUSCLE_PortsDescription) :: ports - integer :: communicator + type(LIBMUSCLE_PortsDescription), intent(in), optional :: ports + integer, intent(in), optional :: communicator, root + integer :: acommunicator, aroot integer :: num_args, i, arg_len - integer (c_intptr_t) :: cla + integer (c_intptr_t) :: cla, ports_ptr character (kind=c_char, len=:), allocatable :: cur_arg num_args = command_argument_count() @@ -16644,37 +16517,25 @@ type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_with_ports_c( & cla, i, cur_arg, int(len(cur_arg), c_size_t)) deallocate(cur_arg) end do - LIBMUSCLE_Instance_create_with_ports_c%ptr = & - LIBMUSCLE_Instance_create_with_ports_cr_( & - cla, ports%ptr, communicator, 0) - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) - end function LIBMUSCLE_Instance_create_with_ports_c - - type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_with_ports( & - ports) - implicit none - - type(LIBMUSCLE_PortsDescription) :: ports - integer :: num_args, i, arg_len - integer (c_intptr_t) :: cla - character (kind=c_char, len=:), allocatable :: cur_arg - - num_args = command_argument_count() - cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1) - do i = 0, num_args - call get_command_argument(i, length=arg_len) - allocate (character(arg_len+1) :: cur_arg) - call get_command_argument(i, value=cur_arg) - cur_arg(arg_len+1:arg_len+1) = c_null_char - call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( & - cla, i, cur_arg, int(len(cur_arg), c_size_t)) - deallocate(cur_arg) - end do - LIBMUSCLE_Instance_create_with_ports%ptr = & - LIBMUSCLE_Instance_create_with_ports_cr_( & - cla, ports%ptr, MPI_COMM_WORLD, 0) + if (present(ports)) then + ports_ptr = ports%ptr + else + ports_ptr = 0 + end if + if (present(communicator)) then + acommunicator = communicator + else + acommunicator = MPI_COMM_WORLD + end if + if (present(root)) then + aroot = root + else + aroot = 0 + end if + LIBMUSCLE_Instance_create%ptr = & + LIBMUSCLE_Instance_create_(cla, ports_ptr, acommunicator, aroot) call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) - end function LIBMUSCLE_Instance_create_with_ports + end function LIBMUSCLE_Instance_create subroutine LIBMUSCLE_Instance_free( & self) diff --git a/scripts/make_libmuscle_api.py b/scripts/make_libmuscle_api.py index a096b262..75322fa8 100755 --- a/scripts/make_libmuscle_api.py +++ b/scripts/make_libmuscle_api.py @@ -814,296 +814,120 @@ def __copy__(self) -> 'Elements': ]) -instance_constructors = [ - Constructor([Obj('CmdLineArgs', 'cla')], 'create_autoports', fc_override=( - 'std::intptr_t LIBMUSCLE_Instance_create_autoports_(std::intptr_t cla) {\n' +instance_constructor = Constructor( + [Obj('CmdLineArgs', 'cla'), Obj('PortsDescription', 'ports')], + fc_override=( + 'std::intptr_t LIBMUSCLE_Instance_create_(\n' + ' std::intptr_t cla,\n' + ' std::intptr_t ports\n' + ') {\n' ' CmdLineArgs * cla_p = reinterpret_cast(cla);\n' - ' Instance * result = new Instance(cla_p->argc(), cla_p->argv());\n' + ' Instance * result;\n' + ' if (ports == 0) {\n' + ' result = new Instance(cla_p->argc(), cla_p->argv());\n' + ' } else {\n' + ' PortsDescription * ports_p = reinterpret_cast(ports);\n' + ' result = new Instance(cla_p->argc(), cla_p->argv(), *ports_p);\n' + ' }\n' ' return reinterpret_cast(result);\n' '}\n\n'), - f_override=( - 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_autoports()\n' - ' implicit none\n' - '\n' - ' integer :: num_args, i, arg_len\n' - ' integer (c_intptr_t) :: cla\n' - ' character (kind=c_char, len=:), allocatable :: cur_arg\n' - '\n' - ' num_args = command_argument_count()\n' - ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' - ' do i = 0, num_args\n' - ' call get_command_argument(i, length=arg_len)\n' - ' allocate (character(arg_len+1) :: cur_arg)\n' - ' call get_command_argument(i, value=cur_arg)\n' - ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' - ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' - ' deallocate(cur_arg)\n' - ' end do\n' - ' LIBMUSCLE_Instance_create_autoports%ptr = &\n' - ' LIBMUSCLE_Instance_create_autoports_(cla)\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' - 'end function LIBMUSCLE_Instance_create_autoports\n' - '\n')), - Constructor( - [Obj('CmdLineArgs', 'cla'), Obj('PortsDescription', 'ports')], - 'create_with_ports', - fc_override=( - 'std::intptr_t LIBMUSCLE_Instance_create_with_ports_(\n' - ' std::intptr_t cla,\n' - ' std::intptr_t ports\n' - ') {\n' - ' CmdLineArgs * cla_p = reinterpret_cast(cla);\n' - ' PortsDescription * ports_p = reinterpret_cast(\n' - ' ports);\n' - ' Instance * result = new Instance(\n' - ' cla_p->argc(), cla_p->argv(), *ports_p);\n' - ' return reinterpret_cast(result);\n' - '}\n\n'), - f_override=( - 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_with_ports(ports)\n' - ' implicit none\n' - '\n' - ' type(LIBMUSCLE_PortsDescription) :: ports\n' - ' integer :: num_args, i, arg_len\n' - ' integer (c_intptr_t) :: cla\n' - ' character (kind=c_char, len=:), allocatable :: cur_arg\n' - '\n' - ' num_args = command_argument_count()\n' - ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' - ' do i = 0, num_args\n' - ' call get_command_argument(i, length=arg_len)\n' - ' allocate (character(arg_len+1) :: cur_arg)\n' - ' call get_command_argument(i, value=cur_arg)\n' - ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' - ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' - ' deallocate(cur_arg)\n' - ' end do\n' - ' LIBMUSCLE_Instance_create_with_ports%ptr = LIBMUSCLE_Instance_create_with_ports_(cla, ports%ptr)\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' - 'end function LIBMUSCLE_Instance_create_with_ports\n' - '\n')), - OverloadSet('create', ['create_autoports', 'create_with_ports']), - ] - - -instance_mpi_constructors = [ - Constructor( - [Obj('CmdLineArgs', 'cla'), Int('communicator'), Int('root')], - 'create_autoports_cr', - fc_override=( - 'std::intptr_t LIBMUSCLE_Instance_create_autoports_cr_(\n' - ' std::intptr_t cla,\n' - ' int communicator,\n' - ' int root\n' - ') {\n' - ' CmdLineArgs * cla_p = reinterpret_cast(cla);\n' - ' MPI_Comm communicator_m = MPI_Comm_f2c(communicator);\n' - ' Instance * result = new Instance(cla_p->argc(), cla_p->argv(), communicator_m, root);\n' - ' return reinterpret_cast(result);\n' - '}\n\n'), - f_override=( - 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_autoports_cr( &\n' - ' communicator, root)\n' - ' implicit none\n' - ' integer :: communicator, root\n' - '\n' - ' integer :: num_args, i, arg_len\n' - ' integer (c_intptr_t) :: cla\n' - ' character (kind=c_char, len=:), allocatable :: cur_arg\n' - '\n' - ' num_args = command_argument_count()\n' - ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' - ' do i = 0, num_args\n' - ' call get_command_argument(i, length=arg_len)\n' - ' allocate (character(arg_len+1) :: cur_arg)\n' - ' call get_command_argument(i, value=cur_arg)\n' - ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' - ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' - ' deallocate(cur_arg)\n' - ' end do\n' - ' LIBMUSCLE_Instance_create_autoports_cr%ptr = &\n' - ' LIBMUSCLE_Instance_create_autoports_cr_(cla, communicator, root)\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' - 'end function LIBMUSCLE_Instance_create_autoports_cr\n' - '\n')), - Constructor( - [Obj('CmdLineArgs', 'cla'), Int('communicator')], - 'create_autoports_c', - fc_override='', - f_override=( - 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_autoports_c( &\n' - ' communicator)\n' - ' implicit none\n' - ' integer :: communicator\n' - '\n' - ' integer :: num_args, i, arg_len\n' - ' integer (c_intptr_t) :: cla\n' - ' character (kind=c_char, len=:), allocatable :: cur_arg\n' - '\n' - ' num_args = command_argument_count()\n' - ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' - ' do i = 0, num_args\n' - ' call get_command_argument(i, length=arg_len)\n' - ' allocate (character(arg_len+1) :: cur_arg)\n' - ' call get_command_argument(i, value=cur_arg)\n' - ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' - ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' - ' deallocate(cur_arg)\n' - ' end do\n' - ' LIBMUSCLE_Instance_create_autoports_c%ptr = &\n' - ' LIBMUSCLE_Instance_create_autoports_cr_(cla, communicator, 0)\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' - 'end function LIBMUSCLE_Instance_create_autoports_c\n' - '\n')), - Constructor( - [Obj('CmdLineArgs', 'cla')], - 'create_autoports', - fc_override='', - f_override=( - 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_autoports()\n' - ' implicit none\n' - '\n' - ' integer :: num_args, i, arg_len\n' - ' integer (c_intptr_t) :: cla\n' - ' character (kind=c_char, len=:), allocatable :: cur_arg\n' - '\n' - ' num_args = command_argument_count()\n' - ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' - ' do i = 0, num_args\n' - ' call get_command_argument(i, length=arg_len)\n' - ' allocate (character(arg_len+1) :: cur_arg)\n' - ' call get_command_argument(i, value=cur_arg)\n' - ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' - ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' - ' deallocate(cur_arg)\n' - ' end do\n' - ' LIBMUSCLE_Instance_create_autoports%ptr = &\n' - ' LIBMUSCLE_Instance_create_autoports_cr_(cla, MPI_COMM_WORLD, 0)\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' - 'end function LIBMUSCLE_Instance_create_autoports\n' - '\n')), - Constructor( - [ - Obj('CmdLineArgs', 'cla'), Obj('PortsDescription', 'ports'), - Int('communicator'), Int('root')], - 'create_with_ports_cr', - fc_override=( - 'std::intptr_t LIBMUSCLE_Instance_create_with_ports_cr_(\n' - ' std::intptr_t cla,\n' - ' std::intptr_t ports,\n' - ' int communicator, int root\n' - ') {\n' - ' CmdLineArgs * cla_p = reinterpret_cast(cla);\n' - ' PortsDescription * ports_p = reinterpret_cast(\n' - ' ports);\n' - ' MPI_Comm communicator_m = MPI_Comm_f2c(communicator);\n' - ' Instance * result = new Instance(\n' - ' cla_p->argc(), cla_p->argv(), *ports_p, communicator_m, root);\n' - ' return reinterpret_cast(result);\n' - '}\n\n'), - f_override=( - 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_with_ports_cr( &\n' - ' ports, communicator, root)\n' - ' implicit none\n' - '\n' - ' type(LIBMUSCLE_PortsDescription) :: ports\n' - ' integer :: communicator, root\n' - ' integer :: num_args, i, arg_len\n' - ' integer (c_intptr_t) :: cla\n' - ' character (kind=c_char, len=:), allocatable :: cur_arg\n' - '\n' - ' num_args = command_argument_count()\n' - ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' - ' do i = 0, num_args\n' - ' call get_command_argument(i, length=arg_len)\n' - ' allocate (character(arg_len+1) :: cur_arg)\n' - ' call get_command_argument(i, value=cur_arg)\n' - ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' - ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' - ' deallocate(cur_arg)\n' - ' end do\n' - ' LIBMUSCLE_Instance_create_with_ports_cr%ptr = &\n' - ' LIBMUSCLE_Instance_create_with_ports_cr_( &\n' - ' cla, ports%ptr, communicator, root)\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' - 'end function LIBMUSCLE_Instance_create_with_ports_cr\n' - '\n')), - Constructor( - [ - Obj('CmdLineArgs', 'cla'), Obj('PortsDescription', 'ports'), - Int('communicator')], - 'create_with_ports_c', - fc_override='', - f_override=( - 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_with_ports_c( &\n' - ' ports, communicator)\n' - ' implicit none\n' - '\n' - ' type(LIBMUSCLE_PortsDescription) :: ports\n' - ' integer :: communicator\n' - ' integer :: num_args, i, arg_len\n' - ' integer (c_intptr_t) :: cla\n' - ' character (kind=c_char, len=:), allocatable :: cur_arg\n' - '\n' - ' num_args = command_argument_count()\n' - ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' - ' do i = 0, num_args\n' - ' call get_command_argument(i, length=arg_len)\n' - ' allocate (character(arg_len+1) :: cur_arg)\n' - ' call get_command_argument(i, value=cur_arg)\n' - ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' - ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' - ' deallocate(cur_arg)\n' - ' end do\n' - ' LIBMUSCLE_Instance_create_with_ports_c%ptr = &\n' - ' LIBMUSCLE_Instance_create_with_ports_cr_( &\n' - ' cla, ports%ptr, communicator, 0)\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' - 'end function LIBMUSCLE_Instance_create_with_ports_c\n' - '\n')), - Constructor( - [ - Obj('CmdLineArgs', 'cla'), Obj('PortsDescription', 'ports')], - 'create_with_ports', - fc_override='', - f_override=( - 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create_with_ports( &\n' - ' ports)\n' - ' implicit none\n' - '\n' - ' type(LIBMUSCLE_PortsDescription) :: ports\n' - ' integer :: num_args, i, arg_len\n' - ' integer (c_intptr_t) :: cla\n' - ' character (kind=c_char, len=:), allocatable :: cur_arg\n' - '\n' - ' num_args = command_argument_count()\n' - ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' - ' do i = 0, num_args\n' - ' call get_command_argument(i, length=arg_len)\n' - ' allocate (character(arg_len+1) :: cur_arg)\n' - ' call get_command_argument(i, value=cur_arg)\n' - ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' - ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' - ' deallocate(cur_arg)\n' - ' end do\n' - ' LIBMUSCLE_Instance_create_with_ports%ptr = &\n' - ' LIBMUSCLE_Instance_create_with_ports_cr_( &\n' - ' cla, ports%ptr, MPI_COMM_WORLD, 0)\n' - ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' - 'end function LIBMUSCLE_Instance_create_with_ports\n' - '\n')), - OverloadSet('create', [ - 'create_autoports_cr', 'create_autoports_c', 'create_autoports', - 'create_with_ports_cr', 'create_with_ports_c', 'create_with_ports']), - ] + f_override=( + 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create(ports)\n' + ' implicit none\n' + '\n' + ' type(LIBMUSCLE_PortsDescription), intent(in), optional :: ports\n' + ' integer :: num_args, i, arg_len\n' + ' integer (c_intptr_t) :: cla, ports_ptr\n' + ' character (kind=c_char, len=:), allocatable :: cur_arg\n' + '\n' + ' num_args = command_argument_count()\n' + ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' + ' do i = 0, num_args\n' + ' call get_command_argument(i, length=arg_len)\n' + ' allocate (character(arg_len+1) :: cur_arg)\n' + ' call get_command_argument(i, value=cur_arg)\n' + ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' + ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' + ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' + ' deallocate(cur_arg)\n' + ' end do\n' + ' if (present(ports)) then\n' + ' ports_ptr = ports%ptr\n' + ' else\n' + ' ports_ptr = 0\n' + ' end if\n' + ' LIBMUSCLE_Instance_create%ptr = LIBMUSCLE_Instance_create_(cla, ports_ptr)\n' + ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' + 'end function LIBMUSCLE_Instance_create\n' + '\n')) + +instance_mpi_constructor = Constructor( + [ + Obj('CmdLineArgs', 'cla'), Obj('PortsDescription', 'ports'), + Int('communicator'), Int('root')], + fc_override=( + 'std::intptr_t LIBMUSCLE_Instance_create_(\n' + ' std::intptr_t cla,\n' + ' std::intptr_t ports,\n' + ' int communicator, int root\n' + ') {\n' + ' CmdLineArgs * cla_p = reinterpret_cast(cla);\n' + ' MPI_Comm communicator_m = MPI_Comm_f2c(communicator);\n' + ' Instance * result;\n' + ' if (ports == 0) {\n' + ' result = new Instance(\n' + ' cla_p->argc(), cla_p->argv(), communicator_m, root);\n' + ' } else {\n' + ' PortsDescription * ports_p = reinterpret_cast(ports);\n' + ' result = new Instance(\n' + ' cla_p->argc(), cla_p->argv(), *ports_p, communicator_m, root);\n' + ' }\n' + ' return reinterpret_cast(result);\n' + '}\n\n'), + f_override=( + 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create( &\n' + ' ports, communicator, root)\n' + ' implicit none\n' + '\n' + ' type(LIBMUSCLE_PortsDescription), intent(in), optional :: ports\n' + ' integer, intent(in), optional :: communicator, root\n' + ' integer :: acommunicator, aroot\n' + ' integer :: num_args, i, arg_len\n' + ' integer (c_intptr_t) :: cla, ports_ptr\n' + ' character (kind=c_char, len=:), allocatable :: cur_arg\n' + '\n' + ' num_args = command_argument_count()\n' + ' cla = LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(num_args + 1)\n' + ' do i = 0, num_args\n' + ' call get_command_argument(i, length=arg_len)\n' + ' allocate (character(arg_len+1) :: cur_arg)\n' + ' call get_command_argument(i, value=cur_arg)\n' + ' cur_arg(arg_len+1:arg_len+1) = c_null_char\n' + ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_( &\n' + ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' + ' deallocate(cur_arg)\n' + ' end do\n' + ' if (present(ports)) then\n' + ' ports_ptr = ports%ptr\n' + ' else\n' + ' ports_ptr = 0\n' + ' end if\n' + ' if (present(communicator)) then\n' + ' acommunicator = communicator\n' + ' else\n' + ' acommunicator = MPI_COMM_WORLD\n' + ' end if\n' + ' if (present(root)) then\n' + ' aroot = root\n' + ' else\n' + ' aroot = 0\n' + ' end if\n' + ' LIBMUSCLE_Instance_create%ptr = &\n' + ' LIBMUSCLE_Instance_create_(cla, ports_ptr, acommunicator, aroot)\n' + ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' + 'end function LIBMUSCLE_Instance_create\n' + '\n') + ) instance_members = [ @@ -1199,12 +1023,12 @@ def __copy__(self) -> 'Elements': instance_desc = Class( - 'Instance', None, instance_constructors + [ + 'Instance', None, [instance_constructor] + [ copy(mem) for mem in instance_members]) instance_mpi_desc = Class( - 'Instance', None, instance_mpi_constructors + [ + 'Instance', None, [instance_mpi_constructor] + [ copy(mem) for mem in instance_members]) From 463ba5b0ed426476d3751fb5e0664af956394942 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 15 Feb 2023 16:15:02 +0100 Subject: [PATCH 031/188] Add InstanceFlags to C++ API --- libmuscle/cpp/src/libmuscle/instance.cpp | 45 ++++++++- libmuscle/cpp/src/libmuscle/instance.hpp | 114 ++++++++++++++++++++++ libmuscle/cpp/src/libmuscle/libmuscle.hpp | 2 + 3 files changed, 157 insertions(+), 4 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index 85b498e9..986410ba 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -67,7 +67,8 @@ class Instance::Impl { public: Impl( int argc, char const * const argv[], - PortsDescription const & ports + PortsDescription const & ports, + InstanceFlags flags #ifdef MUSCLE_ENABLE_MPI , MPI_Comm const & communicator , int root @@ -112,6 +113,7 @@ class Instance::Impl { bool first_run_; std::unordered_map<::ymmsl::Reference, Message> f_init_cache_; bool is_shut_down_; + InstanceFlags flags_; void register_(); void connect_(); @@ -146,7 +148,8 @@ class Instance::Impl { Instance::Impl::Impl( int argc, char const * const argv[], - PortsDescription const & ports + PortsDescription const & ports, + InstanceFlags flags #ifdef MUSCLE_ENABLE_MPI , MPI_Comm const & communicator , int root @@ -162,6 +165,7 @@ Instance::Impl::Impl( , first_run_(true) , f_init_cache_() , is_shut_down_(false) + , flags_(flags) { #ifdef MUSCLE_ENABLE_MPI MPI_Comm_dup(communicator, &mpi_comm_); @@ -907,7 +911,7 @@ Instance::Instance( #endif ) : pimpl_(new Impl( - argc, argv, {{}} + argc, argv, {{}}, InstanceFlags::NONE #ifdef MUSCLE_ENABLE_MPI , communicator, root #endif @@ -923,7 +927,40 @@ Instance::Instance( #endif ) : pimpl_(new Impl( - argc, argv, ports + argc, argv, ports, InstanceFlags::NONE +#ifdef MUSCLE_ENABLE_MPI + , communicator, root +#endif + )) +{} + +Instance::Instance( + int argc, char const * const argv[], + InstanceFlags flags +#ifdef MUSCLE_ENABLE_MPI + , MPI_Comm const & communicator + , int root +#endif + ) + : pimpl_(new Impl( + argc, argv, {{}}, flags +#ifdef MUSCLE_ENABLE_MPI + , communicator, root +#endif + )) +{} + +Instance::Instance( + int argc, char const * const argv[], + PortsDescription const & ports, + InstanceFlags flags +#ifdef MUSCLE_ENABLE_MPI + , MPI_Comm const & communicator + , int root +#endif + ) + : pimpl_(new Impl( + argc, argv, ports, flags #ifdef MUSCLE_ENABLE_MPI , communicator, root #endif diff --git a/libmuscle/cpp/src/libmuscle/instance.hpp b/libmuscle/cpp/src/libmuscle/instance.hpp index f930293f..eca98dd2 100644 --- a/libmuscle/cpp/src/libmuscle/instance.hpp +++ b/libmuscle/cpp/src/libmuscle/instance.hpp @@ -14,6 +14,67 @@ namespace libmuscle { namespace impl { +/** Enumeration of properties that an instance may have. + * + * You may combine multiple flags using the bitwise OR operator `|`. For + * example: + */ +enum class InstanceFlags : int { + NONE = 0, + + /** + * Do not apply the received settings overlay during prereceive of F_INIT + * messages. If you're going to use Instance.receive_with_settings on + * your F_INIT ports, you need to set this flag when creating an + * Instance. + * + * If you don't know what that means, do not specify this flag and everything + * will be fine. If it turns out that you did need to specify the flag, MUSCLE3 + * will tell you about it in an error message and you can add it still. + */ + DONT_APPLY_OVERLAY = 1, + + /** Indicate that this instance supports checkpointing. + * + * You may not use any checkpointing API calls when this flag is not supplied. + */ + USES_CHECKPOINT_API = 2, + + /** Indicate this instance does not carry state between iterations of the + * reuse loop. + * + * This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.NO`. + * + * If neither KEEPS_NO_STATE_FOR_NEXT_USE and STATE_NOT_REQUIRED_FOR_NEXT_USE + * are supplied, this corresponds to + * :external:py:attr:`ymmsl.KeepsStateForNextUse.REQUIRED`. + */ + KEEPS_NO_STATE_FOR_NEXT_USE = 4, + + /** Indicate this instance carries state between iterations of the + * reuse loop, however this state is not required for restarting. + * + * This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.HELPFUL`. + * + * If neither KEEPS_NO_STATE_FOR_NEXT_USE and STATE_NOT_REQUIRED_FOR_NEXT_USE + * are supplied, this corresponds to + * :external:py:attr:`ymmsl.KeepsStateForNextUse.REQUIRED`. + */ + STATE_NOT_REQUIRED_FOR_NEXT_USE = 8, +}; + +inline InstanceFlags operator|(InstanceFlags a, InstanceFlags b) { + return static_cast(static_cast(a) | static_cast(b)); +} + +inline InstanceFlags operator&(InstanceFlags a, InstanceFlags b) { + return static_cast(static_cast(a) & static_cast(b)); +} + +inline bool operator!(InstanceFlags a) { + return a == InstanceFlags::NONE; +} + /** Represents a component instance in a MUSCLE3 simulation. * * This class provides a low-level send/receive API for the instance to use. @@ -69,6 +130,59 @@ class Instance { #endif ); + /** Create an Instance. + * + * For MPI-based components, creating an Instance is a + * collective operation, so it must be done in all processes + * simultaneously, with the same communicator and the same root. + * + * @param argc The number of command-line arguments. + * @param argv Command line arguments. + * @param flags InstanceFlags for this instance. + * @param communicator MPI communicator containing all processes in + * this instance (MPI only). + * @param root The designated root process (MPI only). + */ + Instance( + int argc, char const * const argv[], + InstanceFlags flags +#ifdef MUSCLE_ENABLE_MPI + , MPI_Comm const & communicator = MPI_COMM_WORLD + , int root = 0 +#endif + ); + + /** Create an instance. + * + * A PortsDescription can be written like this: + * + * PortsDescription ports({ + * {Operator::F_INIT, {"port1", "port2"}}, + * {Operator::O_F, {"port3[]"}} + * }); + * + * For MPI-based components, creating an Instance is a + * collective operation, so it must be done in all processes + * simultaneously, with the same communicator and the same root. + * + * @param argc The number of command-line arguments. + * @param argv Command line arguments. + * @param ports A description of the ports that this instance has. + * @param flags InstanceFlags for this instance. + * @param communicator MPI communicator containing all processes in + * this instance (MPI only). + * @param root The designated root process (MPI only). + */ + Instance( + int argc, char const * const argv[], + PortsDescription const & ports, + InstanceFlags flags +#ifdef MUSCLE_ENABLE_MPI + , MPI_Comm const & communicator = MPI_COMM_WORLD + , int root = 0 +#endif + ); + ~Instance(); Instance(Instance const &); Instance(Instance &&); diff --git a/libmuscle/cpp/src/libmuscle/libmuscle.hpp b/libmuscle/cpp/src/libmuscle/libmuscle.hpp index c7d74ada..335d4364 100644 --- a/libmuscle/cpp/src/libmuscle/libmuscle.hpp +++ b/libmuscle/cpp/src/libmuscle/libmuscle.hpp @@ -9,6 +9,8 @@ namespace libmuscle { using impl::Data; using impl::DataConstRef; using impl::Instance; + // Note: C++20 allows using enum, which introduces all enum members in this scope + using impl::InstanceFlags; using impl::Message; using impl::PortsDescription; using impl::StorageOrder; From 440ce45bde0a89257fe0625dc73a735582ded403 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 15 Feb 2023 16:24:02 +0100 Subject: [PATCH 032/188] Add Fortran bindings for InstanceFlags --- .../cpp/build/libmuscle/libmuscle.version | 1 + .../cpp/build/libmuscle/libmuscle_mpi.version | 1 + .../bindings/libmuscle_fortran_c.cpp | 9 +- .../bindings/libmuscle_mpi_fortran_c.cpp | 7 +- libmuscle/fortran/src/libmuscle/libmuscle.f90 | 43 +++++++-- .../fortran/src/libmuscle/libmuscle_mpi.f90 | 54 +++++++---- scripts/api_generator.py | 95 +++++++++++++++++++ scripts/make_libmuscle_api.py | 79 ++++++++------- 8 files changed, 221 insertions(+), 68 deletions(-) diff --git a/libmuscle/cpp/build/libmuscle/libmuscle.version b/libmuscle/cpp/build/libmuscle/libmuscle.version index ea4e7325..2c5f0a07 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle.version @@ -355,6 +355,7 @@ LIBMUSCLE_Instance_receive_with_settings_pd_; LIBMUSCLE_Instance_receive_with_settings_ps_; LIBMUSCLE_Instance_receive_with_settings_psd_; + LIBMUSCLE_InstanceFlags_to_int_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_; diff --git a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version index ea4e7325..2c5f0a07 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version @@ -355,6 +355,7 @@ LIBMUSCLE_Instance_receive_with_settings_pd_; LIBMUSCLE_Instance_receive_with_settings_ps_; LIBMUSCLE_Instance_receive_with_settings_psd_; + LIBMUSCLE_InstanceFlags_to_int_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_set_arg_; diff --git a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp index 17a3d9d3..c1bcc7ec 100644 --- a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp +++ b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp @@ -14,6 +14,7 @@ using libmuscle::Data; using libmuscle::PortsDescription; using libmuscle::Message; using libmuscle::Instance; +using libmuscle::InstanceFlags; using libmuscle::impl::bindings::CmdLineArgs; using ymmsl::Operator; using ymmsl::Settings; @@ -3669,15 +3670,17 @@ void LIBMUSCLE_Message_unset_settings_(std::intptr_t self) { std::intptr_t LIBMUSCLE_Instance_create_( std::intptr_t cla, - std::intptr_t ports + std::intptr_t ports, + int flags ) { CmdLineArgs * cla_p = reinterpret_cast(cla); + InstanceFlags flags_o = static_cast(flags); Instance * result; if (ports == 0) { - result = new Instance(cla_p->argc(), cla_p->argv()); + result = new Instance(cla_p->argc(), cla_p->argv(), flags_o); } else { PortsDescription * ports_p = reinterpret_cast(ports); - result = new Instance(cla_p->argc(), cla_p->argv(), *ports_p); + result = new Instance(cla_p->argc(), cla_p->argv(), *ports_p, flags_o); } return reinterpret_cast(result); } diff --git a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp index cbe4e5cf..a8a1d911 100644 --- a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp +++ b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp @@ -14,6 +14,7 @@ using libmuscle::Data; using libmuscle::PortsDescription; using libmuscle::Message; using libmuscle::Instance; +using libmuscle::InstanceFlags; using libmuscle::impl::bindings::CmdLineArgs; using ymmsl::Operator; using ymmsl::Settings; @@ -3670,18 +3671,20 @@ void LIBMUSCLE_Message_unset_settings_(std::intptr_t self) { std::intptr_t LIBMUSCLE_Instance_create_( std::intptr_t cla, std::intptr_t ports, + int flags, int communicator, int root ) { CmdLineArgs * cla_p = reinterpret_cast(cla); + InstanceFlags flags_o = static_cast(flags); MPI_Comm communicator_m = MPI_Comm_f2c(communicator); Instance * result; if (ports == 0) { result = new Instance( - cla_p->argc(), cla_p->argv(), communicator_m, root); + cla_p->argc(), cla_p->argv(), flags_o, communicator_m, root); } else { PortsDescription * ports_p = reinterpret_cast(ports); result = new Instance( - cla_p->argc(), cla_p->argv(), *ports_p, communicator_m, root); + cla_p->argc(), cla_p->argv(), *ports_p, flags_o, communicator_m, root); } return reinterpret_cast(result); } diff --git a/libmuscle/fortran/src/libmuscle/libmuscle.f90 b/libmuscle/fortran/src/libmuscle/libmuscle.f90 index 22177eb5..1d3f8db5 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle.f90 @@ -467,6 +467,15 @@ module libmuscle public :: LIBMUSCLE_Instance_receive_with_settings_ps public :: LIBMUSCLE_Instance_receive_with_settings_psd public :: LIBMUSCLE_Instance_receive_with_settings_on_slot + type :: LIBMUSCLE_InstanceFlags + logical :: DONT_APPLY_OVERLAY = .false. + logical :: USES_CHECKPOINT_API = .false. + logical :: KEEPS_NO_STATE_FOR_NEXT_USE = .false. + logical :: STATE_NOT_REQUIRED_FOR_NEXT_USE = .false. + + contains + procedure :: to_int => LIBMUSCLE_InstanceFlags_to_int_ + end type integer, parameter :: LIBMUSCLE_IMPL_BINDINGS_success = 0 integer, parameter :: LIBMUSCLE_IMPL_BINDINGS_domain_error = 1 @@ -2953,12 +2962,14 @@ end subroutine LIBMUSCLE_Message_unset_settings_ integer (c_intptr_t) function LIBMUSCLE_Instance_create_( & cla, & - ports) & + ports, & + flags) & bind(C, name="LIBMUSCLE_Instance_create_") use iso_c_binding integer (c_intptr_t), value, intent(in) :: cla integer (c_intptr_t), value, intent(in) :: ports + integer (c_int), value, intent(in) :: flags end function LIBMUSCLE_Instance_create_ subroutine LIBMUSCLE_Instance_free_(self) & @@ -16490,11 +16501,12 @@ subroutine LIBMUSCLE_Message_unset_settings( & self%ptr) end subroutine LIBMUSCLE_Message_unset_settings - type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create(ports) + type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create(ports, flags) implicit none type(LIBMUSCLE_PortsDescription), intent(in), optional :: ports - integer :: num_args, i, arg_len + type(LIBMUSCLE_InstanceFlags), intent(in), optional :: flags + integer :: num_args, i, arg_len, iflags integer (c_intptr_t) :: cla, ports_ptr character (kind=c_char, len=:), allocatable :: cur_arg @@ -16509,12 +16521,12 @@ type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create(ports) cla, i, cur_arg, int(len(cur_arg), c_size_t)) deallocate(cur_arg) end do - if (present(ports)) then - ports_ptr = ports%ptr - else - ports_ptr = 0 - end if - LIBMUSCLE_Instance_create%ptr = LIBMUSCLE_Instance_create_(cla, ports_ptr) + ports_ptr = 0 + if (present(ports)) ports_ptr = ports%ptr + iflags = 0 + if (present(flags)) iflags = flags%to_int() + LIBMUSCLE_Instance_create%ptr = LIBMUSCLE_Instance_create_( & + cla, ports_ptr, iflags) call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) end function LIBMUSCLE_Instance_create @@ -17883,6 +17895,19 @@ function LIBMUSCLE_Instance_receive_with_settings_psd( & LIBMUSCLE_Instance_receive_with_settings_psd%ptr = ret_val end function LIBMUSCLE_Instance_receive_with_settings_psd + integer function LIBMUSCLE_InstanceFlags_to_int_(flags) + implicit none + + class(LIBMUSCLE_InstanceFlags), intent(in) :: flags + integer :: ret_val + + ret_val = 0 + if (flags%DONT_APPLY_OVERLAY) ret_val = ret_val + 1 + if (flags%USES_CHECKPOINT_API) ret_val = ret_val + 2 + if (flags%KEEPS_NO_STATE_FOR_NEXT_USE) ret_val = ret_val + 4 + if (flags%STATE_NOT_REQUIRED_FOR_NEXT_USE) ret_val = ret_val + 8 + LIBMUSCLE_InstanceFlags_to_int_ = ret_val + end function LIBMUSCLE_InstanceFlags_to_int_ function LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create( & count) diff --git a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 index f0bf41a5..6d683cfa 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 @@ -468,6 +468,15 @@ module libmuscle_mpi public :: LIBMUSCLE_Instance_receive_with_settings_ps public :: LIBMUSCLE_Instance_receive_with_settings_psd public :: LIBMUSCLE_Instance_receive_with_settings_on_slot + type :: LIBMUSCLE_InstanceFlags + logical :: DONT_APPLY_OVERLAY = .false. + logical :: USES_CHECKPOINT_API = .false. + logical :: KEEPS_NO_STATE_FOR_NEXT_USE = .false. + logical :: STATE_NOT_REQUIRED_FOR_NEXT_USE = .false. + + contains + procedure :: to_int => LIBMUSCLE_InstanceFlags_to_int_ + end type integer, parameter :: LIBMUSCLE_IMPL_BINDINGS_success = 0 integer, parameter :: LIBMUSCLE_IMPL_BINDINGS_domain_error = 1 @@ -2955,6 +2964,7 @@ end subroutine LIBMUSCLE_Message_unset_settings_ integer (c_intptr_t) function LIBMUSCLE_Instance_create_( & cla, & ports, & + flags, & communicator, & root) & bind(C, name="LIBMUSCLE_Instance_create_") @@ -2962,6 +2972,7 @@ integer (c_intptr_t) function LIBMUSCLE_Instance_create_( & use iso_c_binding integer (c_intptr_t), value, intent(in) :: cla integer (c_intptr_t), value, intent(in) :: ports + integer (c_int), value, intent(in) :: flags integer (c_int), value, intent(in) :: communicator integer (c_int), value, intent(in) :: root end function LIBMUSCLE_Instance_create_ @@ -16496,12 +16507,13 @@ subroutine LIBMUSCLE_Message_unset_settings( & end subroutine LIBMUSCLE_Message_unset_settings type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create( & - ports, communicator, root) + ports, flags, communicator, root) implicit none type(LIBMUSCLE_PortsDescription), intent(in), optional :: ports + type(LIBMUSCLE_InstanceFlags), intent(in), optional :: flags integer, intent(in), optional :: communicator, root - integer :: acommunicator, aroot + integer :: iflags, acommunicator, aroot integer :: num_args, i, arg_len integer (c_intptr_t) :: cla, ports_ptr character (kind=c_char, len=:), allocatable :: cur_arg @@ -16517,23 +16529,16 @@ type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create( & cla, i, cur_arg, int(len(cur_arg), c_size_t)) deallocate(cur_arg) end do - if (present(ports)) then - ports_ptr = ports%ptr - else - ports_ptr = 0 - end if - if (present(communicator)) then - acommunicator = communicator - else - acommunicator = MPI_COMM_WORLD - end if - if (present(root)) then - aroot = root - else - aroot = 0 - end if + ports_ptr = 0 + if (present(ports)) ports_ptr = ports%ptr + iflags = 0 + if (present(flags)) iflags = flags%to_int() + acommunicator = MPI_COMM_WORLD + if (present(communicator)) acommunicator = communicator + aroot = 0 + if (present(root)) aroot = root LIBMUSCLE_Instance_create%ptr = & - LIBMUSCLE_Instance_create_(cla, ports_ptr, acommunicator, aroot) + LIBMUSCLE_Instance_create_(cla, ports_ptr, iflags, acommunicator, aroot) call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla) end function LIBMUSCLE_Instance_create @@ -17902,6 +17907,19 @@ function LIBMUSCLE_Instance_receive_with_settings_psd( & LIBMUSCLE_Instance_receive_with_settings_psd%ptr = ret_val end function LIBMUSCLE_Instance_receive_with_settings_psd + integer function LIBMUSCLE_InstanceFlags_to_int_(flags) + implicit none + + class(LIBMUSCLE_InstanceFlags), intent(in) :: flags + integer :: ret_val + + ret_val = 0 + if (flags%DONT_APPLY_OVERLAY) ret_val = ret_val + 1 + if (flags%USES_CHECKPOINT_API) ret_val = ret_val + 2 + if (flags%KEEPS_NO_STATE_FOR_NEXT_USE) ret_val = ret_val + 4 + if (flags%STATE_NOT_REQUIRED_FOR_NEXT_USE) ret_val = ret_val + 8 + LIBMUSCLE_InstanceFlags_to_int_ = ret_val + end function LIBMUSCLE_InstanceFlags_to_int_ function LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create( & count) diff --git a/scripts/api_generator.py b/scripts/api_generator.py index f26e38ee..fa554750 100644 --- a/scripts/api_generator.py +++ b/scripts/api_generator.py @@ -2286,6 +2286,101 @@ def fortran_type_definition(self) -> str: return textwrap.indent(result, 4*' ') +class Flags: + def __init__(self, name: str, values: List[str]): + """Create a Flags description. + + Flags are an enum class in C++, but a custom derived type with boolean + attributes in Fortran. + + Args: + name: name of the flags + values: list of option names + """ + self.ns_prefix = None # type: Optional[str] + self.public = None # type: Optional[bool] + self.name = name + self.values = values + + def set_ns_prefix(self, ns_for_name: Dict[str, str]) -> None: + """Sets the namespace prefix correctly for all members. + + Args: + ns_for_name: A map from type names to namespace names. + """ + self.ns_prefix = ns_for_name[self.name] + + def set_public(self, public: bool) -> None: + """Sets whether this enum should be public. + + Public objects are usable by the Fortran program. + + Args: + public: True iff this is public. + """ + self.public = public + + def fortran_type_definition(self) -> str: + """Create a Fortran type definition for this enum. + """ + result = '' + public = '' + if self.public: + public = ', public' + + result += f'type :: {self.ns_prefix}_{self.name}\n' + for value in self.values: + result += f' logical :: {value} = .false.\n' + result += '\n' + result += 'contains\n' + result += f' procedure :: to_int => {self.ns_prefix}_{self.name}_to_int_\n' + result += 'end type\n' + return textwrap.indent(result, 4*' ') + + def fortran_exports(self) -> List[str]: + """Generates a list of linker exports for the Fortran symbols. + """ + fun_name = f'{self.ns_prefix}_{self.name}_to_int_;' + return [fun_name] + + def fortran_c_wrapper(self) -> str: + """Create C functions for the members. + """ + return '' + + def fortran_interface(self) -> str: + """Create a Fortran interface definition for the C ABI. + """ + return '' + + def fortran_public_declarations(self) -> str: + """Creates Fortran declarations making functions public. + """ + return '' + + def fortran_overloads(self) -> str: + """Create Fortran overload declarations for any OverloadSets. + """ + return '' + + def fortran_functions(self) -> str: + """Create Fortran function definitions for this class. + """ + fun_name = f'{self.ns_prefix}_{self.name}_to_int_' + result = f'integer function {fun_name}(flags)\n' + result += ' implicit none\n' + result += '\n' + result += f' class({self.ns_prefix}_{self.name}), intent(in) :: flags\n' + result += ' integer :: ret_val\n' + result += '\n' + result += ' ret_val = 0\n' + for i, value in enumerate(self.values): + result += f" if (flags%{value}) ret_val = ret_val + {1 << i}\n" + result += f' {fun_name} = ret_val\n' + result += f'end function {fun_name}\n' + return textwrap.indent(result, 4*' ') + + class Namespace: def __init__(self, name: str, public: Optional[bool], prefix: str, enums: List[Enum], classes: List[Class]) -> None: diff --git a/scripts/make_libmuscle_api.py b/scripts/make_libmuscle_api.py index 75322fa8..7480ec4b 100755 --- a/scripts/make_libmuscle_api.py +++ b/scripts/make_libmuscle_api.py @@ -7,7 +7,7 @@ from api_generator import ( API, Array, AssignmentOperator, Bool, Bytes, Char, Class, Constructor, - Destructor, Double, Enum, EnumVal, Float, IndexAssignmentOperator, Int, + Destructor, Double, Enum, Flags, EnumVal, Float, IndexAssignmentOperator, Int, Int16t, Int32t, Int64t, Member, MemFun, MemFunTmpl, MultiMemFun, NamedConstructor, Namespace, Obj, OverloadSet, ShiftedIndexAssignmentOperator, Sizet, String, T, VecDbl, Vec2Dbl, @@ -815,28 +815,32 @@ def __copy__(self) -> 'Elements': instance_constructor = Constructor( - [Obj('CmdLineArgs', 'cla'), Obj('PortsDescription', 'ports')], + [Obj('CmdLineArgs', 'cla'), Obj('PortsDescription', 'ports'), + Int('flags')], fc_override=( 'std::intptr_t LIBMUSCLE_Instance_create_(\n' ' std::intptr_t cla,\n' - ' std::intptr_t ports\n' + ' std::intptr_t ports,\n' + ' int flags\n' ') {\n' ' CmdLineArgs * cla_p = reinterpret_cast(cla);\n' + ' InstanceFlags flags_o = static_cast(flags);\n' ' Instance * result;\n' ' if (ports == 0) {\n' - ' result = new Instance(cla_p->argc(), cla_p->argv());\n' + ' result = new Instance(cla_p->argc(), cla_p->argv(), flags_o);\n' ' } else {\n' ' PortsDescription * ports_p = reinterpret_cast(ports);\n' - ' result = new Instance(cla_p->argc(), cla_p->argv(), *ports_p);\n' + ' result = new Instance(cla_p->argc(), cla_p->argv(), *ports_p, flags_o);\n' ' }\n' ' return reinterpret_cast(result);\n' '}\n\n'), f_override=( - 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create(ports)\n' + 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create(ports, flags)\n' ' implicit none\n' '\n' ' type(LIBMUSCLE_PortsDescription), intent(in), optional :: ports\n' - ' integer :: num_args, i, arg_len\n' + ' type(LIBMUSCLE_InstanceFlags), intent(in), optional :: flags\n' + ' integer :: num_args, i, arg_len, iflags\n' ' integer (c_intptr_t) :: cla, ports_ptr\n' ' character (kind=c_char, len=:), allocatable :: cur_arg\n' '\n' @@ -851,12 +855,12 @@ def __copy__(self) -> 'Elements': ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' ' deallocate(cur_arg)\n' ' end do\n' - ' if (present(ports)) then\n' - ' ports_ptr = ports%ptr\n' - ' else\n' - ' ports_ptr = 0\n' - ' end if\n' - ' LIBMUSCLE_Instance_create%ptr = LIBMUSCLE_Instance_create_(cla, ports_ptr)\n' + ' ports_ptr = 0\n' + ' if (present(ports)) ports_ptr = ports%ptr\n' + ' iflags = 0\n' + ' if (present(flags)) iflags = flags%to_int()\n' + ' LIBMUSCLE_Instance_create%ptr = LIBMUSCLE_Instance_create_( &\n' + ' cla, ports_ptr, iflags)\n' ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' 'end function LIBMUSCLE_Instance_create\n' '\n')) @@ -864,34 +868,37 @@ def __copy__(self) -> 'Elements': instance_mpi_constructor = Constructor( [ Obj('CmdLineArgs', 'cla'), Obj('PortsDescription', 'ports'), - Int('communicator'), Int('root')], + Int('flags'), Int('communicator'), Int('root')], fc_override=( 'std::intptr_t LIBMUSCLE_Instance_create_(\n' ' std::intptr_t cla,\n' ' std::intptr_t ports,\n' + ' int flags,\n' ' int communicator, int root\n' ') {\n' ' CmdLineArgs * cla_p = reinterpret_cast(cla);\n' + ' InstanceFlags flags_o = static_cast(flags);\n' ' MPI_Comm communicator_m = MPI_Comm_f2c(communicator);\n' ' Instance * result;\n' ' if (ports == 0) {\n' ' result = new Instance(\n' - ' cla_p->argc(), cla_p->argv(), communicator_m, root);\n' + ' cla_p->argc(), cla_p->argv(), flags_o, communicator_m, root);\n' ' } else {\n' ' PortsDescription * ports_p = reinterpret_cast(ports);\n' ' result = new Instance(\n' - ' cla_p->argc(), cla_p->argv(), *ports_p, communicator_m, root);\n' + ' cla_p->argc(), cla_p->argv(), *ports_p, flags_o, communicator_m, root);\n' ' }\n' ' return reinterpret_cast(result);\n' '}\n\n'), f_override=( 'type(LIBMUSCLE_Instance) function LIBMUSCLE_Instance_create( &\n' - ' ports, communicator, root)\n' + ' ports, flags, communicator, root)\n' ' implicit none\n' '\n' ' type(LIBMUSCLE_PortsDescription), intent(in), optional :: ports\n' + ' type(LIBMUSCLE_InstanceFlags), intent(in), optional :: flags\n' ' integer, intent(in), optional :: communicator, root\n' - ' integer :: acommunicator, aroot\n' + ' integer :: iflags, acommunicator, aroot\n' ' integer :: num_args, i, arg_len\n' ' integer (c_intptr_t) :: cla, ports_ptr\n' ' character (kind=c_char, len=:), allocatable :: cur_arg\n' @@ -907,23 +914,16 @@ def __copy__(self) -> 'Elements': ' cla, i, cur_arg, int(len(cur_arg), c_size_t))\n' ' deallocate(cur_arg)\n' ' end do\n' - ' if (present(ports)) then\n' - ' ports_ptr = ports%ptr\n' - ' else\n' - ' ports_ptr = 0\n' - ' end if\n' - ' if (present(communicator)) then\n' - ' acommunicator = communicator\n' - ' else\n' - ' acommunicator = MPI_COMM_WORLD\n' - ' end if\n' - ' if (present(root)) then\n' - ' aroot = root\n' - ' else\n' - ' aroot = 0\n' - ' end if\n' + ' ports_ptr = 0\n' + ' if (present(ports)) ports_ptr = ports%ptr\n' + ' iflags = 0\n' + ' if (present(flags)) iflags = flags%to_int()\n' + ' acommunicator = MPI_COMM_WORLD\n' + ' if (present(communicator)) acommunicator = communicator\n' + ' aroot = 0\n' + ' if (present(root)) aroot = root\n' ' LIBMUSCLE_Instance_create%ptr = &\n' - ' LIBMUSCLE_Instance_create_(cla, ports_ptr, acommunicator, aroot)\n' + ' LIBMUSCLE_Instance_create_(cla, ports_ptr, iflags, acommunicator, aroot)\n' ' call LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_(cla)\n' 'end function LIBMUSCLE_Instance_create\n' '\n') @@ -1022,6 +1022,13 @@ def __copy__(self) -> 'Elements': ] +# These need to kept in sync with the values in the C++ implementation +instanceflags_desc = Flags('InstanceFlags', [ + "DONT_APPLY_OVERLAY", + "USES_CHECKPOINT_API", + "KEEPS_NO_STATE_FOR_NEXT_USE", + "STATE_NOT_REQUIRED_FOR_NEXT_USE"]) + instance_desc = Class( 'Instance', None, [instance_constructor] + [ copy(mem) for mem in instance_members]) @@ -1058,7 +1065,7 @@ def __copy__(self) -> 'Elements': [ Namespace('libmuscle', True, 'LIBMUSCLE', [], [ dataconstref_desc, data_desc, portsdescription_desc, - message_desc, instance_desc]), + message_desc, instance_desc, instanceflags_desc]), Namespace('libmuscle::impl::bindings', False, 'LIBMUSCLE_IMPL_BINDINGS', [], [cmdlineargs_desc]), Namespace('ymmsl', None, 'YMMSL', @@ -1080,7 +1087,7 @@ def __copy__(self) -> 'Elements': [ Namespace('libmuscle', True, 'LIBMUSCLE', [], [ dataconstref_desc, data_desc, portsdescription_desc, - message_desc, instance_mpi_desc]), + message_desc, instance_mpi_desc, instanceflags_desc]), Namespace('libmuscle::impl::bindings', False, 'LIBMUSCLE_IMPL_BINDINGS', [], [cmdlineargs_desc]), Namespace('ymmsl', None, 'YMMSL', From ec39352168f38aa78d25612f3066b96ae1861239 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 15 Feb 2023 17:24:20 +0100 Subject: [PATCH 033/188] Make LIBMUSCLE_InstanceFlags public --- libmuscle/fortran/src/libmuscle/libmuscle.f90 | 3 ++- libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 | 3 ++- scripts/api_generator.py | 5 ++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libmuscle/fortran/src/libmuscle/libmuscle.f90 b/libmuscle/fortran/src/libmuscle/libmuscle.f90 index 1d3f8db5..243cdde8 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle.f90 @@ -467,7 +467,8 @@ module libmuscle public :: LIBMUSCLE_Instance_receive_with_settings_ps public :: LIBMUSCLE_Instance_receive_with_settings_psd public :: LIBMUSCLE_Instance_receive_with_settings_on_slot - type :: LIBMUSCLE_InstanceFlags + public :: LIBMUSCLE_InstanceFlags + type LIBMUSCLE_InstanceFlags logical :: DONT_APPLY_OVERLAY = .false. logical :: USES_CHECKPOINT_API = .false. logical :: KEEPS_NO_STATE_FOR_NEXT_USE = .false. diff --git a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 index 6d683cfa..6032888f 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 @@ -468,7 +468,8 @@ module libmuscle_mpi public :: LIBMUSCLE_Instance_receive_with_settings_ps public :: LIBMUSCLE_Instance_receive_with_settings_psd public :: LIBMUSCLE_Instance_receive_with_settings_on_slot - type :: LIBMUSCLE_InstanceFlags + public :: LIBMUSCLE_InstanceFlags + type LIBMUSCLE_InstanceFlags logical :: DONT_APPLY_OVERLAY = .false. logical :: USES_CHECKPOINT_API = .false. logical :: KEEPS_NO_STATE_FOR_NEXT_USE = .false. diff --git a/scripts/api_generator.py b/scripts/api_generator.py index fa554750..9d9f88bc 100644 --- a/scripts/api_generator.py +++ b/scripts/api_generator.py @@ -2324,11 +2324,10 @@ def fortran_type_definition(self) -> str: """Create a Fortran type definition for this enum. """ result = '' - public = '' if self.public: - public = ', public' + result += f'public :: {self.ns_prefix}_{self.name}\n' - result += f'type :: {self.ns_prefix}_{self.name}\n' + result += f'type {self.ns_prefix}_{self.name}\n' for value in self.values: result += f' logical :: {value} = .false.\n' result += '\n' From fc8407f8fc31fc764d156aee3bc208cd45928993 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 15 Feb 2023 17:32:31 +0100 Subject: [PATCH 034/188] Remove `apply_overlay` from `reuse_instance` --- .../cpp/build/libmuscle/libmuscle.version | 3 +- .../cpp/build/libmuscle/libmuscle_mpi.version | 3 +- .../bindings/libmuscle_fortran_c.cpp | 8 +-- .../bindings/libmuscle_mpi_fortran_c.cpp | 8 +-- libmuscle/cpp/src/libmuscle/instance.cpp | 33 ++++++------ libmuscle/cpp/src/libmuscle/instance.hpp | 11 +--- .../cpp/src/libmuscle/tests/test_instance.cpp | 11 ++-- libmuscle/fortran/src/libmuscle/libmuscle.f90 | 51 +++---------------- .../fortran/src/libmuscle/libmuscle_mpi.f90 | 51 +++---------------- libmuscle/python/libmuscle/instance.py | 25 ++++----- scripts/make_libmuscle_api.py | 12 +---- 11 files changed, 54 insertions(+), 162 deletions(-) diff --git a/libmuscle/cpp/build/libmuscle/libmuscle.version b/libmuscle/cpp/build/libmuscle/libmuscle.version index 2c5f0a07..b86357c8 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle.version @@ -324,8 +324,7 @@ LIBMUSCLE_Message_unset_settings_; LIBMUSCLE_Instance_create_; LIBMUSCLE_Instance_free_; - LIBMUSCLE_Instance_reuse_instance_default_; - LIBMUSCLE_Instance_reuse_instance_apply_; + LIBMUSCLE_Instance_reuse_instance_; LIBMUSCLE_Instance_error_shutdown_; LIBMUSCLE_Instance_is_setting_a_character_; LIBMUSCLE_Instance_is_setting_a_int8_; diff --git a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version index 2c5f0a07..b86357c8 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version @@ -324,8 +324,7 @@ LIBMUSCLE_Message_unset_settings_; LIBMUSCLE_Instance_create_; LIBMUSCLE_Instance_free_; - LIBMUSCLE_Instance_reuse_instance_default_; - LIBMUSCLE_Instance_reuse_instance_apply_; + LIBMUSCLE_Instance_reuse_instance_; LIBMUSCLE_Instance_error_shutdown_; LIBMUSCLE_Instance_is_setting_a_character_; LIBMUSCLE_Instance_is_setting_a_int8_; diff --git a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp index c1bcc7ec..34036c21 100644 --- a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp +++ b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp @@ -3691,18 +3691,12 @@ void LIBMUSCLE_Instance_free_(std::intptr_t self) { return; } -bool LIBMUSCLE_Instance_reuse_instance_default_(std::intptr_t self) { +bool LIBMUSCLE_Instance_reuse_instance_(std::intptr_t self) { Instance * self_p = reinterpret_cast(self); bool result = self_p->reuse_instance(); return result; } -bool LIBMUSCLE_Instance_reuse_instance_apply_(std::intptr_t self, bool apply_overlay) { - Instance * self_p = reinterpret_cast(self); - bool result = self_p->reuse_instance(apply_overlay); - return result; -} - void LIBMUSCLE_Instance_error_shutdown_(std::intptr_t self, char * message, std::size_t message_size) { Instance * self_p = reinterpret_cast(self); std::string message_s(message, message_size); diff --git a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp index a8a1d911..b6170f3f 100644 --- a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp +++ b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp @@ -3695,18 +3695,12 @@ void LIBMUSCLE_Instance_free_(std::intptr_t self) { return; } -bool LIBMUSCLE_Instance_reuse_instance_default_(std::intptr_t self) { +bool LIBMUSCLE_Instance_reuse_instance_(std::intptr_t self) { Instance * self_p = reinterpret_cast(self); bool result = self_p->reuse_instance(); return result; } -bool LIBMUSCLE_Instance_reuse_instance_apply_(std::intptr_t self, bool apply_overlay) { - Instance * self_p = reinterpret_cast(self); - bool result = self_p->reuse_instance(apply_overlay); - return result; -} - void LIBMUSCLE_Instance_error_shutdown_(std::intptr_t self, char * message, std::size_t message_size) { Instance * self_p = reinterpret_cast(self); std::string message_s(message, message_size); diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index 986410ba..dcdb9488 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -76,7 +76,7 @@ class Instance::Impl { ); ~Impl(); - bool reuse_instance(bool apply_overlay = true); + bool reuse_instance(); void error_shutdown(std::string const & message); ::ymmsl::SettingValue get_setting(std::string const & name) const; template @@ -126,10 +126,8 @@ class Instance::Impl { std::vector<::ymmsl::Port> list_declared_ports_() const; void check_port_(std::string const & port_name); bool receive_settings_(); - void pre_receive_( - std::string const & port_name, - Optional slot, bool apply_overlay); - void pre_receive_f_init_(bool apply_overlay); + void pre_receive_(std::string const & port_name, Optional slot); + void pre_receive_f_init_(); void set_local_log_level_(); void set_remote_log_level_(); void apply_overlay_(Message const & message); @@ -213,7 +211,7 @@ Instance::Impl::~Impl() { shutdown_(); } -bool Instance::Impl::reuse_instance(bool apply_overlay) { +bool Instance::Impl::reuse_instance() { bool do_reuse; #ifdef MUSCLE_ENABLE_MPI if (mpi_barrier_.is_root()) { @@ -222,7 +220,7 @@ bool Instance::Impl::reuse_instance(bool apply_overlay) { // TODO: f_init_cache_ should be empty here, or the user didn't receive // something that was sent on the last go-around. At least emit a warning. - pre_receive_f_init_(apply_overlay); + pre_receive_f_init_(); set_local_log_level_(); set_remote_log_level_(); @@ -479,8 +477,9 @@ Message Instance::Impl::receive_message( if (with_settings && !result.has_settings()) { std::string msg( "If you use receive_with_settings() on an F_INIT" - " port, then you have to pass false to" - " reuse_instance(), otherwise the settings will" + " port, then you have to set the flag" + " 'InstanceFlags::DONT_APPLY_OVERLAY' when constructing" + " the Instance, otherwise the settings will" " already have been applied by MUSCLE."); logger_->critical(msg); shutdown_(); @@ -672,13 +671,13 @@ bool Instance::Impl::receive_settings_() { /* Pre-receive on the given port and slot, if any. */ void Instance::Impl::pre_receive_( - std::string const & port_name, Optional slot, - bool apply_overlay) { + std::string const & port_name, Optional slot) { Reference port_ref(port_name); if (slot.is_set()) port_ref += slot.get(); Message msg = communicator_->receive_message(port_name, slot); + bool apply_overlay = !(flags_ & InstanceFlags::DONT_APPLY_OVERLAY); if (apply_overlay) { apply_overlay_(msg); check_compatibility_(port_name, msg.settings()); @@ -692,7 +691,7 @@ void Instance::Impl::pre_receive_( * This receives all incoming messages on F_INIT and stores them in * f_init_cache_. */ -void Instance::Impl::pre_receive_f_init_(bool apply_overlay) { +void Instance::Impl::pre_receive_f_init_() { f_init_cache_.clear(); auto ports = communicator_->list_ports(); if (ports.count(Operator::F_INIT) == 1) { @@ -702,13 +701,13 @@ void Instance::Impl::pre_receive_f_init_(bool apply_overlay) { if (!port.is_connected()) continue; if (!port.is_vector()) - pre_receive_(port_name, {}, apply_overlay); + pre_receive_(port_name, {}); else { - pre_receive_(port_name, 0, apply_overlay); + pre_receive_(port_name, 0); // The above receives the length, if needed, so now we can get // the rest. for (int slot = 1; slot < port.get_length(); ++slot) - pre_receive_(port_name, slot, apply_overlay); + pre_receive_(port_name, slot); } } } @@ -969,8 +968,8 @@ Instance::Instance( Instance::~Instance() = default; -bool Instance::reuse_instance(bool apply_overlay) { - return impl_()->reuse_instance(apply_overlay); +bool Instance::reuse_instance() { + return impl_()->reuse_instance(); } void Instance::error_shutdown(std::string const & message) { diff --git a/libmuscle/cpp/src/libmuscle/instance.hpp b/libmuscle/cpp/src/libmuscle/instance.hpp index eca98dd2..e4a7ae3d 100644 --- a/libmuscle/cpp/src/libmuscle/instance.hpp +++ b/libmuscle/cpp/src/libmuscle/instance.hpp @@ -214,17 +214,8 @@ class Instance { * MPI-based components must execute the reuse loop in each * process in parallel, and call this function at the top of the * reuse loop in each process. - * - * @param apply_overlay Whether to apply the received settings - * overlay or to save it. If you're going to use - * receive_with_settings() on your F_INIT ports, - * set this to false. If you don't know what that means, - * just call reuse_instance() without specifying this - * and everything will be fine. If it turns out that you - * did need to specify false, MUSCLE3 will tell you about - * it in an error message and you can add it. */ - bool reuse_instance(bool apply_overlay = true); + bool reuse_instance(); /** Logs an error and shuts down the Instance. * diff --git a/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp b/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp index 48247a57..637732e0 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp @@ -36,6 +36,7 @@ using libmuscle::impl::ClosePort; using libmuscle::impl::Instance; +using libmuscle::impl::InstanceFlags; using libmuscle::impl::Message; using libmuscle::impl::MockCommunicator; using libmuscle::impl::MockMMPClient; @@ -289,7 +290,8 @@ TEST(libmuscle_instance, receive_with_settings) { Instance instance(argv.size(), argv.data(), PortsDescription({ {Operator::F_INIT, {"in"}} - })); + }), + InstanceFlags::DONT_APPLY_OVERLAY); MockCommunicator::list_ports_return_value = PortsDescription({ {Operator::F_INIT, {"in"}} @@ -302,7 +304,7 @@ TEST(libmuscle_instance, receive_with_settings) { MockCommunicator::next_received_message["in"] = std::make_unique(1.0, "Testing with settings", recv_settings); - ASSERT_TRUE(instance.reuse_instance(false)); + ASSERT_TRUE(instance.reuse_instance()); Message msg(instance.receive_with_settings("in")); ASSERT_EQ(msg.timestamp(), 1.0); @@ -349,7 +351,8 @@ TEST(libmuscle_instance, receive_with_settings_default) { Instance instance(argv.size(), argv.data(), PortsDescription({ {Operator::F_INIT, {"not_connected"}} - })); + }), + InstanceFlags::DONT_APPLY_OVERLAY); MockCommunicator::list_ports_return_value = PortsDescription({ {Operator::F_INIT, {"not_connected"}} @@ -361,7 +364,7 @@ TEST(libmuscle_instance, receive_with_settings_default) { default_settings["test1"] = 12; Message default_msg(1.0, "Testing with settings", default_settings); - ASSERT_TRUE(instance.reuse_instance(false)); + ASSERT_TRUE(instance.reuse_instance()); Message msg(instance.receive_with_settings("not_connected", default_msg)); ASSERT_EQ(msg.timestamp(), 1.0); diff --git a/libmuscle/fortran/src/libmuscle/libmuscle.f90 b/libmuscle/fortran/src/libmuscle/libmuscle.f90 index 243cdde8..433e816a 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle.f90 @@ -430,8 +430,6 @@ module libmuscle public :: LIBMUSCLE_Instance_create public :: LIBMUSCLE_Instance_free - public :: LIBMUSCLE_Instance_reuse_instance_default - public :: LIBMUSCLE_Instance_reuse_instance_apply public :: LIBMUSCLE_Instance_reuse_instance public :: LIBMUSCLE_Instance_error_shutdown public :: LIBMUSCLE_Instance_is_setting_a_character @@ -2980,22 +2978,12 @@ subroutine LIBMUSCLE_Instance_free_(self) & integer (c_intptr_t), value, intent(in) :: self end subroutine LIBMUSCLE_Instance_free_ - logical (c_bool) function LIBMUSCLE_Instance_reuse_instance_default_(self) & - bind(C, name="LIBMUSCLE_Instance_reuse_instance_default_") + logical (c_bool) function LIBMUSCLE_Instance_reuse_instance_(self) & + bind(C, name="LIBMUSCLE_Instance_reuse_instance_") use iso_c_binding integer (c_intptr_t), value, intent(in) :: self - end function LIBMUSCLE_Instance_reuse_instance_default_ - - logical (c_bool) function LIBMUSCLE_Instance_reuse_instance_apply_( & - self, & - apply_overlay) & - bind(C, name="LIBMUSCLE_Instance_reuse_instance_apply_") - - use iso_c_binding - integer (c_intptr_t), value, intent(in) :: self - logical (c_bool), value, intent(in) :: apply_overlay - end function LIBMUSCLE_Instance_reuse_instance_apply_ + end function LIBMUSCLE_Instance_reuse_instance_ subroutine LIBMUSCLE_Instance_error_shutdown_( & self, & @@ -3822,12 +3810,6 @@ end function LIBMUSCLE_Instance_receive_with_settings_psd_ LIBMUSCLE_Message_set_data_dcr end interface - interface LIBMUSCLE_Instance_reuse_instance - module procedure & - LIBMUSCLE_Instance_reuse_instance_default, & - LIBMUSCLE_Instance_reuse_instance_apply - end interface - interface LIBMUSCLE_Instance_send module procedure & LIBMUSCLE_Instance_send_pm, & @@ -16540,36 +16522,19 @@ subroutine LIBMUSCLE_Instance_free( & self%ptr) end subroutine LIBMUSCLE_Instance_free - function LIBMUSCLE_Instance_reuse_instance_default( & + function LIBMUSCLE_Instance_reuse_instance( & self) implicit none type(LIBMUSCLE_Instance), intent(in) :: self - logical :: LIBMUSCLE_Instance_reuse_instance_default + logical :: LIBMUSCLE_Instance_reuse_instance logical (c_bool) :: ret_val - ret_val = LIBMUSCLE_Instance_reuse_instance_default_( & + ret_val = LIBMUSCLE_Instance_reuse_instance_( & self%ptr) - LIBMUSCLE_Instance_reuse_instance_default = ret_val - end function LIBMUSCLE_Instance_reuse_instance_default - - function LIBMUSCLE_Instance_reuse_instance_apply( & - self, & - apply_overlay) - implicit none - type(LIBMUSCLE_Instance), intent(in) :: self - logical, intent(in) :: apply_overlay - logical :: LIBMUSCLE_Instance_reuse_instance_apply - - logical (c_bool) :: ret_val - - ret_val = LIBMUSCLE_Instance_reuse_instance_apply_( & - self%ptr, & - logical(apply_overlay, c_bool)) - - LIBMUSCLE_Instance_reuse_instance_apply = ret_val - end function LIBMUSCLE_Instance_reuse_instance_apply + LIBMUSCLE_Instance_reuse_instance = ret_val + end function LIBMUSCLE_Instance_reuse_instance subroutine LIBMUSCLE_Instance_error_shutdown( & self, & diff --git a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 index 6032888f..960620a6 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 @@ -431,8 +431,6 @@ module libmuscle_mpi public :: LIBMUSCLE_Instance_create public :: LIBMUSCLE_Instance_free - public :: LIBMUSCLE_Instance_reuse_instance_default - public :: LIBMUSCLE_Instance_reuse_instance_apply public :: LIBMUSCLE_Instance_reuse_instance public :: LIBMUSCLE_Instance_error_shutdown public :: LIBMUSCLE_Instance_is_setting_a_character @@ -2985,22 +2983,12 @@ subroutine LIBMUSCLE_Instance_free_(self) & integer (c_intptr_t), value, intent(in) :: self end subroutine LIBMUSCLE_Instance_free_ - logical (c_bool) function LIBMUSCLE_Instance_reuse_instance_default_(self) & - bind(C, name="LIBMUSCLE_Instance_reuse_instance_default_") + logical (c_bool) function LIBMUSCLE_Instance_reuse_instance_(self) & + bind(C, name="LIBMUSCLE_Instance_reuse_instance_") use iso_c_binding integer (c_intptr_t), value, intent(in) :: self - end function LIBMUSCLE_Instance_reuse_instance_default_ - - logical (c_bool) function LIBMUSCLE_Instance_reuse_instance_apply_( & - self, & - apply_overlay) & - bind(C, name="LIBMUSCLE_Instance_reuse_instance_apply_") - - use iso_c_binding - integer (c_intptr_t), value, intent(in) :: self - logical (c_bool), value, intent(in) :: apply_overlay - end function LIBMUSCLE_Instance_reuse_instance_apply_ + end function LIBMUSCLE_Instance_reuse_instance_ subroutine LIBMUSCLE_Instance_error_shutdown_( & self, & @@ -3827,12 +3815,6 @@ end function LIBMUSCLE_Instance_receive_with_settings_psd_ LIBMUSCLE_Message_set_data_dcr end interface - interface LIBMUSCLE_Instance_reuse_instance - module procedure & - LIBMUSCLE_Instance_reuse_instance_default, & - LIBMUSCLE_Instance_reuse_instance_apply - end interface - interface LIBMUSCLE_Instance_send module procedure & LIBMUSCLE_Instance_send_pm, & @@ -16552,36 +16534,19 @@ subroutine LIBMUSCLE_Instance_free( & self%ptr) end subroutine LIBMUSCLE_Instance_free - function LIBMUSCLE_Instance_reuse_instance_default( & + function LIBMUSCLE_Instance_reuse_instance( & self) implicit none type(LIBMUSCLE_Instance), intent(in) :: self - logical :: LIBMUSCLE_Instance_reuse_instance_default + logical :: LIBMUSCLE_Instance_reuse_instance logical (c_bool) :: ret_val - ret_val = LIBMUSCLE_Instance_reuse_instance_default_( & + ret_val = LIBMUSCLE_Instance_reuse_instance_( & self%ptr) - LIBMUSCLE_Instance_reuse_instance_default = ret_val - end function LIBMUSCLE_Instance_reuse_instance_default - - function LIBMUSCLE_Instance_reuse_instance_apply( & - self, & - apply_overlay) - implicit none - type(LIBMUSCLE_Instance), intent(in) :: self - logical, intent(in) :: apply_overlay - logical :: LIBMUSCLE_Instance_reuse_instance_apply - - logical (c_bool) :: ret_val - - ret_val = LIBMUSCLE_Instance_reuse_instance_apply_( & - self%ptr, & - logical(apply_overlay, c_bool)) - - LIBMUSCLE_Instance_reuse_instance_apply = ret_val - end function LIBMUSCLE_Instance_reuse_instance_apply + LIBMUSCLE_Instance_reuse_instance = ret_val + end function LIBMUSCLE_Instance_reuse_instance subroutine LIBMUSCLE_Instance_error_shutdown( & self, & diff --git a/libmuscle/python/libmuscle/instance.py b/libmuscle/python/libmuscle/instance.py index fd553f6a..cfde144b 100644 --- a/libmuscle/python/libmuscle/instance.py +++ b/libmuscle/python/libmuscle/instance.py @@ -208,7 +208,7 @@ def __init__( self._set_local_log_level() self._set_remote_log_level() - def reuse_instance(self, apply_overlay: Optional[bool] = None) -> bool: + def reuse_instance(self) -> bool: """Decide whether to run this instance again. In a multiscale simulation, instances get reused all the time. @@ -249,7 +249,7 @@ def reuse_instance(self, apply_overlay: Optional[bool] = None) -> bool: do_reuse = self._do_reuse self._do_reuse = None else: - do_reuse = self._decide_reuse_instance(apply_overlay) + do_reuse = self._decide_reuse_instance() # now _first_run, _do_resume and _do_init are also set correctly @@ -780,8 +780,7 @@ def __set_up_logging(self) -> None: self.__manager) logging.getLogger().addHandler(self._mmp_handler) - def _decide_reuse_instance( - self, apply_overlay: Optional[bool] = None) -> bool: + def _decide_reuse_instance(self) -> bool: """Decide whether and how to reuse the instance. This sets self._first_run, self._do_resume and self._do_init, and @@ -805,7 +804,7 @@ def _decide_reuse_instance( # resume from final if self._first_run and self._snapshot_manager.resuming_from_final(): if f_init_connected: - got_f_init_messages = self._pre_receive(apply_overlay) + got_f_init_messages = self._pre_receive() self._do_resume = True self._do_init = True return got_f_init_messages @@ -823,7 +822,7 @@ def _decide_reuse_instance( return self._first_run # not resuming and f_init connected, run while we get messages - got_f_init_messages = self._pre_receive(apply_overlay) + got_f_init_messages = self._pre_receive() self._do_init = got_f_init_messages return got_f_init_messages @@ -981,7 +980,7 @@ def _have_f_init_connections(self) -> bool: for port in ports.get(Operator.F_INIT, [])]) return f_init_connected or self._communicator.settings_in_connected() - def _pre_receive(self, apply_overlay: Optional[bool]) -> bool: + def _pre_receive(self) -> bool: """Pre-receives on all ports. This includes muscle_settings_in and all user-defined ports. @@ -990,7 +989,7 @@ def _pre_receive(self, apply_overlay: Optional[bool]) -> bool: True iff no ClosePort messages were received. """ all_ports_open = self.__receive_settings() - self.__pre_receive_f_init(apply_overlay) + self.__pre_receive_f_init() for message in self._f_init_cache.values(): if isinstance(message.data, ClosePort): all_ports_open = False @@ -1024,19 +1023,13 @@ def __receive_settings(self) -> bool: self._trigger_manager.harmonise_wall_time(saved_until) return True - def __pre_receive_f_init(self, apply_overlay: Optional[bool]) -> None: + def __pre_receive_f_init(self) -> None: """Receives on all ports connected to F_INIT. This receives all incoming messages on F_INIT and stores them in self._f_init_cache. """ - if apply_overlay is not None: - warnings.warn( - 'Explicitly providing apply_overlay in reuse_instance is' - ' deprecated. Use InstanceFlags.DONT_APPLY_OVERLAY when' - ' creating the instance instead.', DeprecationWarning) - else: - apply_overlay = InstanceFlags.DONT_APPLY_OVERLAY not in self._flags + apply_overlay = InstanceFlags.DONT_APPLY_OVERLAY not in self._flags def pre_receive(port_name: str, slot: Optional[int]) -> None: msg, saved_until = self._communicator.receive_message( diff --git a/scripts/make_libmuscle_api.py b/scripts/make_libmuscle_api.py index 7480ec4b..933f1330 100755 --- a/scripts/make_libmuscle_api.py +++ b/scripts/make_libmuscle_api.py @@ -932,17 +932,7 @@ def __copy__(self) -> 'Elements': instance_members = [ Destructor(), - MemFun( - Bool(), 'reuse_instance_default', - cpp_chain_call=lambda **kwargs: 'self_p->reuse_instance()'), - MemFun( - Bool(), 'reuse_instance_apply', [Bool('apply_overlay')], - cpp_chain_call=lambda **kwargs: ( - 'self_p->reuse_instance({})'.format(kwargs['cpp_args'])) - ), - OverloadSet( - 'reuse_instance', - ['reuse_instance_default', 'reuse_instance_apply']), + MemFun(Bool(), 'reuse_instance'), MemFun(Void(), 'error_shutdown', [String('message')]), MemFunTmpl( [String(), Int64t(), Double(), Bool(), VecDbl('value'), From b310acf6c3f65c3e1da9a2aeb1d6fd0eb4ab0715 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 15 Feb 2023 17:32:49 +0100 Subject: [PATCH 035/188] Update examples and changelog for previous commit --- CHANGELOG.rst | 23 +++++++++++++++++++ docs/source/examples/cpp/load_balancer.cpp | 6 +++-- .../source/examples/fortran/load_balancer.f90 | 4 ++-- docs/source/examples/fortran/reaction_mpi.f90 | 2 +- .../examples/python/reaction_diffusion_qmc.py | 7 +++--- 5 files changed, 34 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b7266209..93ee97fa 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,29 @@ Change Log All notable changes to this project will be documented in this file. This project adheres to `Semantic Versioning `_. +0.7.0 +***** + +Backwards Incompatible changes +------------------------------ + +* `Instance.reuse_instance` no longer accepts `apply_overlay` argument. Use + `InstanceFlags.DONT_APPLY_OVERLAY` when creating the instance instead. +* `LIBMUSCLE_Instance_create` signature has changed, this might lead to errors like: + + .. code-block:: text + + 30 | instance = LIBMUSCLE_Instance_create(ports, MPI_COMM_WORLD, root_rank) + | 1 + Error: Type mismatch in argument ‘flags’ at (1); passed INTEGER(4) to TYPE(libmuscle_instanceflags) + + You may provide an explicit `InstanceFlags()` argument, or use named arguments: + + .. code-block:: fortran + + instance = LIBMUSCLE_Instance_create(ports, LIBMUSCLE_InstanceFlags(), MPI_COMM_WORLD, root_rank) + instance = LIBMUSCLE_Instance_create(ports, communicator=MPI_COMM_WORLD, root=root_rank) + 0.6.0 ***** diff --git a/docs/source/examples/cpp/load_balancer.cpp b/docs/source/examples/cpp/load_balancer.cpp index 5bb5f8e8..569d3fb9 100644 --- a/docs/source/examples/cpp/load_balancer.cpp +++ b/docs/source/examples/cpp/load_balancer.cpp @@ -8,6 +8,7 @@ using libmuscle::Data; using libmuscle::Instance; +using libmuscle::InstanceFlags; using libmuscle::Message; using ymmsl::Operator; using ymmsl::Settings; @@ -27,9 +28,10 @@ void load_balancer(int argc, char * argv[]) { {Operator::F_INIT, {"front_in[]"}}, {Operator::O_I, {"back_out[]"}}, {Operator::S, {"back_in[]"}}, - {Operator::O_F, {"front_out[]"}}}); + {Operator::O_F, {"front_out[]"}}}, + InstanceFlags::DONT_APPLY_OVERLAY); - while (instance.reuse_instance(false)) { + while (instance.reuse_instance()) { // F_INIT int started = 0; int done = 0; diff --git a/docs/source/examples/fortran/load_balancer.f90 b/docs/source/examples/fortran/load_balancer.f90 index d8af6ba4..3f8f03ab 100644 --- a/docs/source/examples/fortran/load_balancer.f90 +++ b/docs/source/examples/fortran/load_balancer.f90 @@ -26,10 +26,10 @@ program load_balancer call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_I, 'back_out[]') call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_S, 'back_in[]') call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_F, 'front_out[]') - instance = LIBMUSCLE_Instance_create(ports) + instance = LIBMUSCLE_Instance_create(ports, LIBMUSCLE_InstanceFlags(DONT_APPLY_OVERLAY=.true.)) call LIBMUSCLE_PortsDescription_free(ports) - do while (LIBMUSCLE_Instance_reuse_instance(instance, .false.)) + do while (LIBMUSCLE_Instance_reuse_instance(instance)) ! F_INIT started = 0 done = 0 diff --git a/docs/source/examples/fortran/reaction_mpi.f90 b/docs/source/examples/fortran/reaction_mpi.f90 index 8dcdacdc..8b4b951f 100644 --- a/docs/source/examples/fortran/reaction_mpi.f90 +++ b/docs/source/examples/fortran/reaction_mpi.f90 @@ -27,7 +27,7 @@ program reaction_mpi ports = LIBMUSCLE_PortsDescription_create() call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_F_INIT, 'initial_state') call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_F, 'final_state') - instance = LIBMUSCLE_Instance_create(ports, MPI_COMM_WORLD, root_rank) + instance = LIBMUSCLE_Instance_create(ports, communicator=MPI_COMM_WORLD, root=root_rank) call LIBMUSCLE_PortsDescription_free(ports) do while (LIBMUSCLE_Instance_reuse_instance(instance)) diff --git a/docs/source/examples/python/reaction_diffusion_qmc.py b/docs/source/examples/python/reaction_diffusion_qmc.py index 07f8aba1..5b58e2af 100644 --- a/docs/source/examples/python/reaction_diffusion_qmc.py +++ b/docs/source/examples/python/reaction_diffusion_qmc.py @@ -4,7 +4,7 @@ import numpy as np import sobol_seq -from libmuscle import Grid, Instance, Message +from libmuscle import Grid, Instance, Message, DONT_APPLY_OVERLAY from libmuscle.runner import run_simulation from ymmsl import ( Component, Conduit, Configuration, Model, Operator, Ports, Settings) @@ -130,9 +130,10 @@ def load_balancer() -> None: Operator.F_INIT: ['front_in[]'], Operator.O_I: ['back_out[]'], Operator.S: ['back_in[]'], - Operator.O_F: ['front_out[]']}) + Operator.O_F: ['front_out[]']}, + DONT_APPLY_OVERLAY) - while instance.reuse_instance(False): + while instance.reuse_instance(): # F_INIT started = 0 # number started and index of next to start done = 0 # number done and index of next to return From 2506cee463d8feb87f6bf536ecebd18037933d4f Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 15 Feb 2023 18:39:27 +0100 Subject: [PATCH 036/188] Fix: use exit code 1 on failed assert in Fortran --- .../fortran/src/libmuscle/tests/assert.f90 | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/libmuscle/fortran/src/libmuscle/tests/assert.f90 b/libmuscle/fortran/src/libmuscle/tests/assert.f90 index 6831e108..163735c3 100644 --- a/libmuscle/fortran/src/libmuscle/tests/assert.f90 +++ b/libmuscle/fortran/src/libmuscle/tests/assert.f90 @@ -7,7 +7,7 @@ subroutine assert_true(x) if (.not. x) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_true @@ -17,7 +17,7 @@ subroutine assert_false(x) if (x) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_false @@ -28,7 +28,7 @@ subroutine assert_eq_integer(x, y) if (x .ne. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_integer @@ -39,7 +39,7 @@ subroutine assert_eq_int1(x, y) if (x .ne. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_int1 @@ -50,7 +50,7 @@ subroutine assert_eq_int2(x, y) if (x .ne. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_int2 @@ -61,7 +61,7 @@ subroutine assert_eq_int4(x, y) if (x .ne. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_int4 @@ -72,7 +72,7 @@ subroutine assert_eq_int8(x, y) if (x .ne. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_int8 @@ -83,7 +83,7 @@ subroutine assert_eq_size(x, y) if (x .ne. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_size @@ -93,7 +93,7 @@ subroutine assert_eq_logical(x, y) if (x .neqv. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_logical @@ -103,7 +103,7 @@ subroutine assert_eq_character(x, y) if (x .ne. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_character @@ -114,7 +114,7 @@ subroutine assert_eq_real4(x, y) if (x .ne. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_real4 @@ -125,7 +125,7 @@ subroutine assert_eq_real8(x, y) if (x .ne. y) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_real8 @@ -137,7 +137,7 @@ subroutine assert_eq_real8array(x, y) if (.not. all(x .eq. y)) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_real8array @@ -149,7 +149,7 @@ subroutine assert_eq_real8array2(x, y) if (.not. all(x .eq. y)) then print *, 'Assertion failed' - stop + stop 1 end if end subroutine assert_eq_real8array2 end module assert From fbdd0498265b511d94239f9359d0950cbe041510 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 15 Feb 2023 18:40:13 +0100 Subject: [PATCH 037/188] Add fortran tests for instance flags --- .../libmuscle/tests/test_instance_flags.f90 | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 libmuscle/fortran/src/libmuscle/tests/test_instance_flags.f90 diff --git a/libmuscle/fortran/src/libmuscle/tests/test_instance_flags.f90 b/libmuscle/fortran/src/libmuscle/tests/test_instance_flags.f90 new file mode 100644 index 00000000..7dd1a919 --- /dev/null +++ b/libmuscle/fortran/src/libmuscle/tests/test_instance_flags.f90 @@ -0,0 +1,129 @@ +module instanceflags_tests + use assert + implicit none +contains + subroutine test_instanceflags_default_create + use libmuscle + + type(LIBMUSCLE_InstanceFlags) :: flags + integer :: i + + print *, '[ RUN ] instanceflags.default_create' + flags = LIBMUSCLE_InstanceFlags() + call assert_false(flags%DONT_APPLY_OVERLAY) + call assert_false(flags%USES_CHECKPOINT_API) + call assert_false(flags%KEEPS_NO_STATE_FOR_NEXT_USE) + call assert_false(flags%STATE_NOT_REQUIRED_FOR_NEXT_USE) + i = flags%to_int() + call assert_eq_integer(i, 0) + print *, '[ OK ] instanceflags.default_create' + end subroutine test_instanceflags_default_create + + subroutine test_instanceflags_create_1 + use libmuscle + + type(LIBMUSCLE_InstanceFlags) :: flags + integer :: i + + print *, '[ RUN ] instanceflags.create_1' + flags = LIBMUSCLE_InstanceFlags(DONT_APPLY_OVERLAY=.true.) + call assert_true(flags%DONT_APPLY_OVERLAY) + call assert_false(flags%USES_CHECKPOINT_API) + call assert_false(flags%KEEPS_NO_STATE_FOR_NEXT_USE) + call assert_false(flags%STATE_NOT_REQUIRED_FOR_NEXT_USE) + i = flags%to_int() + call assert_eq_integer(i, 1) + print *, '[ OK ] instanceflags.create_1' + end subroutine test_instanceflags_create_1 + + subroutine test_instanceflags_create_2 + use libmuscle + + type(LIBMUSCLE_InstanceFlags) :: flags + integer :: i + + print *, '[ RUN ] instanceflags.create_2' + flags = LIBMUSCLE_InstanceFlags(USES_CHECKPOINT_API=.true.) + call assert_false(flags%DONT_APPLY_OVERLAY) + call assert_true(flags%USES_CHECKPOINT_API) + call assert_false(flags%KEEPS_NO_STATE_FOR_NEXT_USE) + call assert_false(flags%STATE_NOT_REQUIRED_FOR_NEXT_USE) + i = flags%to_int() + call assert_eq_integer(i, 2) + print *, '[ OK ] instanceflags.create_2' + end subroutine test_instanceflags_create_2 + + subroutine test_instanceflags_create_3 + use libmuscle + + type(LIBMUSCLE_InstanceFlags) :: flags + integer :: i + + print *, '[ RUN ] instanceflags.create_3' + flags = LIBMUSCLE_InstanceFlags(KEEPS_NO_STATE_FOR_NEXT_USE=.true.) + call assert_false(flags%DONT_APPLY_OVERLAY) + call assert_false(flags%USES_CHECKPOINT_API) + call assert_true(flags%KEEPS_NO_STATE_FOR_NEXT_USE) + call assert_false(flags%STATE_NOT_REQUIRED_FOR_NEXT_USE) + i = flags%to_int() + call assert_eq_integer(i, 4) + print *, '[ OK ] instanceflags.create_3' + end subroutine test_instanceflags_create_3 + + subroutine test_instanceflags_create_4 + use libmuscle + + type(LIBMUSCLE_InstanceFlags) :: flags + integer :: i + + print *, '[ RUN ] instanceflags.create_4' + flags = LIBMUSCLE_InstanceFlags(STATE_NOT_REQUIRED_FOR_NEXT_USE=.true.) + call assert_false(flags%DONT_APPLY_OVERLAY) + call assert_false(flags%USES_CHECKPOINT_API) + call assert_false(flags%KEEPS_NO_STATE_FOR_NEXT_USE) + call assert_true(flags%STATE_NOT_REQUIRED_FOR_NEXT_USE) + i = flags%to_int() + call assert_eq_integer(i, 8) + print *, '[ OK ] instanceflags.create_4' + end subroutine test_instanceflags_create_4 + + subroutine test_instanceflags_create_all + use libmuscle + + type(LIBMUSCLE_InstanceFlags) :: flags + integer :: i + + print *, '[ RUN ] instanceflags.create_all' + flags = LIBMUSCLE_InstanceFlags( & + DONT_APPLY_OVERLAY=.true., & + USES_CHECKPOINT_API=.true., & + KEEPS_NO_STATE_FOR_NEXT_USE=.true., & + STATE_NOT_REQUIRED_FOR_NEXT_USE=.true.) + call assert_true(flags%DONT_APPLY_OVERLAY) + call assert_true(flags%USES_CHECKPOINT_API) + call assert_true(flags%KEEPS_NO_STATE_FOR_NEXT_USE) + call assert_true(flags%STATE_NOT_REQUIRED_FOR_NEXT_USE) + i = flags%to_int() + call assert_eq_integer(i, 15) + print *, '[ OK ] instanceflags.create_all' + end subroutine test_instanceflags_create_all +end module instanceflags_tests + +program test_instanceflags + use instanceflags_tests + implicit none + + print *, '' + print *, '[==========] Fortran API InstanceFlags' + + call test_instanceflags_default_create + call test_instanceflags_create_1 + call test_instanceflags_create_2 + call test_instanceflags_create_3 + call test_instanceflags_create_4 + call test_instanceflags_create_all + + print *, '[==========] Fortran API InstanceFlags' + print *, '[ PASSED ] Fortran API InstanceFlags' + print *, '' +end program test_instanceflags From e96aa1adf73493ac86e69f02260ea0d743346cbf Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 16 Feb 2023 13:15:46 +0100 Subject: [PATCH 038/188] Update documentation --- docs/source/cpp_api.rst | 1 + docs/source/fortran_api.rst | 136 +++++++++------------ docs/source/mpi.rst | 2 +- docs/source/uncertainty_quantification.rst | 19 ++- libmuscle/cpp/src/libmuscle/instance.hpp | 16 ++- libmuscle/python/libmuscle/instance.py | 17 +-- 6 files changed, 89 insertions(+), 102 deletions(-) diff --git a/docs/source/cpp_api.rst b/docs/source/cpp_api.rst index fa954e81..226e5537 100644 --- a/docs/source/cpp_api.rst +++ b/docs/source/cpp_api.rst @@ -14,6 +14,7 @@ Namespace libmuscle .. doxygenclass:: libmuscle::impl::Data .. doxygenclass:: libmuscle::impl::DataConstRef .. doxygenclass:: libmuscle::impl::Instance +.. doxygenenum:: libmuscle::impl::InstanceFlags .. doxygenclass:: libmuscle::impl::Message .. doxygentypedef:: libmuscle::impl::PortsDescription diff --git a/docs/source/fortran_api.rst b/docs/source/fortran_api.rst index 895a49bd..d4ce91c2 100644 --- a/docs/source/fortran_api.rst +++ b/docs/source/fortran_api.rst @@ -1488,65 +1488,16 @@ LIBMUSCLE_PortsDescription :rtype port_name: character LIBMUSCLE_Instance -`````````````````````````` +`````````````````` .. f:type:: LIBMUSCLE_Instance The Instance class represents a component instance in a MUSCLE3 simulation. This class provides a low-level send/receive API for the instance to use. -.. f:function:: LIBMUSCLE_Instance_create() - - Create a new Instance object with ports from the configuration. - - For MPI-based components, this will have libmuscle_mpi use a duplicate of - ``MPI_COMM_WORLD`` to communicate, and the designated root process will be - that with rank 0. - - This object must be freed when you're done with it using - :f:func:`LIBMUSCLE_Instance_free`. - - :r instance: The newly created instance object. - :rtype instance: LIBMUSCLE_Instance - -.. f:function:: LIBMUSCLE_Instance_create(ports) - - Create a new Instance object with the given ports. - - For MPI-based components, this will have libmuscle_mpi use a duplicate - of ``MPI_COMM_WORLD`` to communicate, and the designated root process will - be that with rank 0. - - This object must be freed when you're done with it using - :f:func:`LIBMUSCLE_Instance_free`. - - :p LIBMUSCLE_PortsDescription ports: The ports of the new instance. - :r instance: The newly created instance object. - :rtype instance: LIBMUSCLE_Instance - -.. f:function:: LIBMUSCLE_Instance_create(communicator, root) - - Create a new Instance object for MPI with ports from the configuration. - - For MPI-based components, an MPI communicator and a root rank may be - passed. The communicator must contain all processes in this instance, and - ``root`` must be the rank of one of them. MUSCLE will create a duplicate of - this communicator for its own use. Creating a :f:type:`LIBMUSCLE_Instance` - for an MPI component is a collective operation, so it must be done in - all processes simultaneously, with the same communicator and the same root. - - This object must be freed when you're done with it using - :f:func:`LIBMUSCLE_Instance_free`. - - :p integer communicator: MPI communicator to use (optional, default - MPI_COMM_WORLD). - :p integer root: Rank of the root process (optional, default 0). - :r instance: The newly created instance object. - :rtype instance: LIBMUSCLE_Instance - -.. f:function:: LIBMUSCLE_Instance_create(ports, communicator, root) +.. f:function:: LIBMUSCLE_Instance_create(ports, flags, communicator, root) - Create a new Instance object for MPI with the given ports. + Create a new Instance object for MPI with the given ports and flags. For MPI-based components, an MPI communicator and a root rank may be passed. The communicator must contain all processes in this instance, and @@ -1558,10 +1509,11 @@ LIBMUSCLE_Instance This object must be freed when you're done with it using :f:func:`LIBMUSCLE_Instance_free`. - :p LIBMUSCLE_PortsDescription ports: The ports of the new instance. - :p integer communicator: MPI communicator to use (optional, default + :p LIBMUSCLE_PortsDescription ports [optional]: The ports of the new instance. + :p LIBMUSCLE_InstanceFlags flags [optional]: The flags to use for the new instance. + :p integer communicator [optional]: MPI communicator to use (optional, default MPI_COMM_WORLD). - :p integer root: Rank of the root process (optional, default 0). + :p integer root [optional]: Rank of the root process (optional, default 0). :r instance: The newly created instance object. :rtype instance: LIBMUSCLE_Instance @@ -1587,27 +1539,6 @@ LIBMUSCLE_Instance :r reuse: Whether to enter the reuse loop another time. :rtype reuse: logical -.. f:function:: LIBMUSCLE_Instance_reuse_instance(self, apply_overlay) - - Checks whether to reuse this instance. - - This method must be called at the beginning of the reuse loop, i.e. before - the F_INIT operator, and its return value should decide whether to enter - that loop again. - - This version of this function lets you choose whether to apply the received - settings overlay or to return it with the message. If you're going to use - :f:func:`LIBMUSCLE_Instance_receive_with_settings` on your F_INIT ports, set - this to ``.false.``. If you don't know what that means, just call - ``LIBMUSCLE_Instance_reuse_instance()`` with no arguments and all will be - fine. If it turns out that you did need to specify ``.false.`` here, MUSCLE - 3 will tell you in an error message, and you can add it. - - :p LIBMUSCLE_Instance self: The object to check for reuse. - :p logical apply_overlay: Whether to apply the received settings overlay. - :r reuse: Whether to enter the reuse loop another time. - :rtype reuse: logical - .. f:subroutine:: LIBMUSCLE_Instance_error_shutdown(self, message) Logs an error and shuts down the Instance. @@ -2185,6 +2116,59 @@ LIBMUSCLE_Instance :r message: The received message. :rtype message: LIBMUSCLE_Message +LIBMUSCLE_InstanceFlags +``````````````````````` + +.. f:type:: LIBMUSCLE_InstanceFlags + + The InstanceFlags type represents the flags that can be supplied + when creating a new :f:type:`Instance`. Multiple flags may be set simultaneously, + for example: + + .. code-block:: fortran + + ports = LIBMUSCLE_PortsDescription_create() + ! Specify ports... + instance = LIBMUSCLE_Instance_create(ports, & + LIBMUSCLE_InstanceFlags(DONT_APPLY_OVERLAY=.true., USES_CHECKPOINT_API=.true.)) + call LIBMUSCLE_PortsDescription_free(ports) + + + :f logical DONT_APPLY_OVERLAY: Set to ``.true.`` to not apply the received settings + overlay during prereceive of F_INIT messages. + + If you're going to use Instance.receive_with_settings on your F_INIT ports, + you need to set this flag when creating an Instance. + + If you don't know what that means, do not specify this flag and everything + will be fine. If it turns out that you did need to specify the flag, MUSCLE3 + will tell you about it in an error message and you can add it still. + + :f logical USES_CHECKPOINT_API: Set to ``.true.`` to indicate that this instance + supports checkpointing. + + You may not use any checkpointing API calls when this flag is not supplied. + + :f logical KEEPS_NO_STATE_FOR_NEXT_USE: Indicate this instance does not carry state + between iterations of the reuse loop. + + This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.NO`. + + If neither ``KEEPS_NO_STATE_FOR_NEXT_USE`` and + ``STATE_NOT_REQUIRED_FOR_NEXT_USE`` are supplied, this corresponds to + :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`. + + :f logical STATE_NOT_REQUIRED_FOR_NEXT_USE: Indicate this instance carries state + between iterations of the reuse loop, however this state is not required + for restarting. + + This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.HELPFUL`. + + If neither ``KEEPS_NO_STATE_FOR_NEXT_USE`` and + ``STATE_NOT_REQUIRED_FOR_NEXT_USE`` are supplied, this corresponds to + :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`. + + Namespace YMMSL --------------- diff --git a/docs/source/mpi.rst b/docs/source/mpi.rst index 0b11b6a7..6e981d13 100644 --- a/docs/source/mpi.rst +++ b/docs/source/mpi.rst @@ -180,7 +180,7 @@ Creating an Instance ports = LIBMUSCLE_PortsDescription_create() call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_F_INIT, 'initial_state') call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_F, 'final_state') - instance = LIBMUSCLE_Instance_create(ports, MPI_COMM_WORLD, root_rank) + instance = LIBMUSCLE_Instance_create(ports, communicator=MPI_COMM_WORLD, root=root_rank) call LIBMUSCLE_PortsDescription_free(ports) diff --git a/docs/source/uncertainty_quantification.rst b/docs/source/uncertainty_quantification.rst index 36dff972..8501866d 100644 --- a/docs/source/uncertainty_quantification.rst +++ b/docs/source/uncertainty_quantification.rst @@ -252,7 +252,14 @@ limited number of macro model instances. It has a front side, which connects to .. code-block:: python - while instance.reuse_instance(False): + instance = Instance({ + Operator.F_INIT: ['front_in[]'], + Operator.O_I: ['back_out[]'], + Operator.S: ['back_in[]'], + Operator.O_F: ['front_out[]']}, + DONT_APPLY_OVERLAY) + + while instance.reuse_instance(): # F_INIT started = 0 # number started and index of next to start done = 0 # number done and index of next to return @@ -287,10 +294,12 @@ receiving a result, they'll be queued up and processed in order.) We use :meth:`libmuscle.Instance.receive_with_settings` everywhere, in order to correctly pass on any settings overlays. Since we are using :meth:`libmuscle.Instance.receive_with_settings` on an F_INIT port, we passed -``False`` to :meth:`libmuscle.Instance.reuse_instance`. It is a technical -requirement of MUSCLE3 to do this, and MUSCLE will give an error message if -you call :meth:`libmuscle.Instance.receive_with_settings` without having passed -``False`` to :meth:`libmuscle.Instance.reuse_instance`. (There's just no other +:attr:`libmuscle.InstanceFlags.DONT_APPLY_OVERLAY` as flag when creating the +:class:`libmuscle.Instance`. It is a technical requirement of MUSCLE3 to do +this, and MUSCLE will give an error message if you call +:meth:`libmuscle.Instance.receive_with_settings` without having set the +:attr:`libmuscle.InstanceFlags.DONT_APPLY_OVERLAY` flag when creating the +:class:`libmuscle.Instance.reuse_instance`. (There's just no other way to implement this, or rather, all other options can lead to potentially difficult-to-debug situations, while this can be checked and a clear error message shown if it goes wrong. So we chose this as the preferable option.) diff --git a/libmuscle/cpp/src/libmuscle/instance.hpp b/libmuscle/cpp/src/libmuscle/instance.hpp index e4a7ae3d..fdd8320f 100644 --- a/libmuscle/cpp/src/libmuscle/instance.hpp +++ b/libmuscle/cpp/src/libmuscle/instance.hpp @@ -16,8 +16,12 @@ namespace libmuscle { namespace impl { /** Enumeration of properties that an instance may have. * - * You may combine multiple flags using the bitwise OR operator `|`. For - * example: + * You may combine multiple flags using the bitwise OR operator `|`. For example: + * + * \code{cpp} + * auto flags = InstanceFlags::DONT_APPLY_OVERLAY | InstanceFlags::USES_CHECKPOINT_API; + * Instance instance(argc, argv, flags); + * \endcode */ enum class InstanceFlags : int { NONE = 0, @@ -43,22 +47,22 @@ enum class InstanceFlags : int { /** Indicate this instance does not carry state between iterations of the * reuse loop. * - * This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.NO`. + * This corresponds to `ymmsl.KeepsStateForNextUse.NO`. * * If neither KEEPS_NO_STATE_FOR_NEXT_USE and STATE_NOT_REQUIRED_FOR_NEXT_USE * are supplied, this corresponds to - * :external:py:attr:`ymmsl.KeepsStateForNextUse.REQUIRED`. + * `ymmsl.KeepsStateForNextUse.NECESSARY`. */ KEEPS_NO_STATE_FOR_NEXT_USE = 4, /** Indicate this instance carries state between iterations of the * reuse loop, however this state is not required for restarting. * - * This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.HELPFUL`. + * This corresponds to `ymmsl.KeepsStateForNextUse.HELPFUL`. * * If neither KEEPS_NO_STATE_FOR_NEXT_USE and STATE_NOT_REQUIRED_FOR_NEXT_USE * are supplied, this corresponds to - * :external:py:attr:`ymmsl.KeepsStateForNextUse.REQUIRED`. + * `ymmsl.KeepsStateForNextUse.NECESSARY`. */ STATE_NOT_REQUIRED_FOR_NEXT_USE = 8, }; diff --git a/libmuscle/python/libmuscle/instance.py b/libmuscle/python/libmuscle/instance.py index cfde144b..905c8832 100644 --- a/libmuscle/python/libmuscle/instance.py +++ b/libmuscle/python/libmuscle/instance.py @@ -73,7 +73,7 @@ class InstanceFlags(Flag): If neither :attr:`KEEPS_NO_STATE_FOR_NEXT_USE` and :attr:`STATE_NOT_REQUIRED_FOR_NEXT_USE` are supplied, this corresponds to - :external:py:attr:`ymmsl.KeepsStateForNextUse.REQUIRED`. + :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`. """ STATE_NOT_REQUIRED_FOR_NEXT_USE = auto() @@ -84,7 +84,7 @@ class InstanceFlags(Flag): If neither :attr:`KEEPS_NO_STATE_FOR_NEXT_USE` and :attr:`STATE_NOT_REQUIRED_FOR_NEXT_USE` are supplied, this corresponds to - :external:py:attr:`ymmsl.KeepsStateForNextUse.REQUIRED`. + :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`. """ @@ -643,18 +643,7 @@ def should_save_final_snapshot(self) -> bool: .. note:: This method will block until it can determine whether a final snapshot should be taken. This means it must also determine if this - instance is reused. The optional keyword-only argument - `apply_overlay` has the same meaning as for :meth:`reuse_instance`. - - Args: - apply_overlay: Whether to apply the received settings - overlay or to save it. If you're going to use - :meth:`receive_with_settings` on your F_INIT ports, set this to - False. If you don't know what that means, just call - :meth:`should_save_final_snapshot()` without specifying this and - everything will be fine. If it turns out that you did need to - specify False, MUSCLE3 will tell you about it in an error - message and you can add it still. + instance is reused. Returns: True iff a final snapshot should be taken by the submodel according From 480c1ee6fb9f9a0d3275051ac99534ea20de1e29 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 13:16:15 +0100 Subject: [PATCH 039/188] Update documentation of InstanceFlags --- docs/source/fortran_api.rst | 24 ++++++++++++------------ libmuscle/cpp/src/libmuscle/instance.hpp | 24 ++++++++++++------------ libmuscle/python/libmuscle/instance.py | 23 ++++++++++++----------- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/docs/source/fortran_api.rst b/docs/source/fortran_api.rst index d4ce91c2..90bb433b 100644 --- a/docs/source/fortran_api.rst +++ b/docs/source/fortran_api.rst @@ -2150,23 +2150,23 @@ LIBMUSCLE_InstanceFlags You may not use any checkpointing API calls when this flag is not supplied. :f logical KEEPS_NO_STATE_FOR_NEXT_USE: Indicate this instance does not carry state - between iterations of the reuse loop. + between iterations of the reuse loop. Specifying this flag is equivalent to + :external:py:attr:`ymmsl.KeepsStateForNextUse.NO`. - This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.NO`. - - If neither ``KEEPS_NO_STATE_FOR_NEXT_USE`` and - ``STATE_NOT_REQUIRED_FOR_NEXT_USE`` are supplied, this corresponds to - :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`. + By default, (if neither ``KEEPS_NO_STATE_FOR_NEXT_USE`` nor + ``STATE_NOT_REQUIRED_FOR_NEXT_USE`` are provided), the instance is assumed + to keep state between reuses, and to require that state (equivalent to + :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`). :f logical STATE_NOT_REQUIRED_FOR_NEXT_USE: Indicate this instance carries state between iterations of the reuse loop, however this state is not required - for restarting. - - This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.HELPFUL`. + for restarting. Specifying this flag is equivalent to + :external:py:attr:`ymmsl.KeepsStateForNextUse.HELPFUL`. - If neither ``KEEPS_NO_STATE_FOR_NEXT_USE`` and - ``STATE_NOT_REQUIRED_FOR_NEXT_USE`` are supplied, this corresponds to - :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`. + By default, (if neither ``KEEPS_NO_STATE_FOR_NEXT_USE`` nor + ``STATE_NOT_REQUIRED_FOR_NEXT_USE`` are provided), the instance is assumed + to keep state between reuses, and to require that state (equivalent to + :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`). Namespace YMMSL diff --git a/libmuscle/cpp/src/libmuscle/instance.hpp b/libmuscle/cpp/src/libmuscle/instance.hpp index fdd8320f..9b651b3d 100644 --- a/libmuscle/cpp/src/libmuscle/instance.hpp +++ b/libmuscle/cpp/src/libmuscle/instance.hpp @@ -45,24 +45,24 @@ enum class InstanceFlags : int { USES_CHECKPOINT_API = 2, /** Indicate this instance does not carry state between iterations of the - * reuse loop. + * reuse loop. Specifying this flag is equivalent to + * `ymmsl.KeepsStateForNextUse.NO`. * - * This corresponds to `ymmsl.KeepsStateForNextUse.NO`. - * - * If neither KEEPS_NO_STATE_FOR_NEXT_USE and STATE_NOT_REQUIRED_FOR_NEXT_USE - * are supplied, this corresponds to - * `ymmsl.KeepsStateForNextUse.NECESSARY`. + * By default, (if neither KEEPS_NO_STATE_FOR_NEXT_USE nor + * STATE_NOT_REQUIRED_FOR_NEXT_USE are provided), the instance is assumed + * to keep state between reuses, and to require that state (equivalent to + * `ymmsl.KeepsStateForNextUse.NECESSARY`). */ KEEPS_NO_STATE_FOR_NEXT_USE = 4, /** Indicate this instance carries state between iterations of the - * reuse loop, however this state is not required for restarting. - * - * This corresponds to `ymmsl.KeepsStateForNextUse.HELPFUL`. + * reuse loop, however this state is not required for restarting. + * Specifying this flag is equivalent to `ymmsl.KeepsStateForNextUse.HELPFUL`. * - * If neither KEEPS_NO_STATE_FOR_NEXT_USE and STATE_NOT_REQUIRED_FOR_NEXT_USE - * are supplied, this corresponds to - * `ymmsl.KeepsStateForNextUse.NECESSARY`. + * By default, (if neither KEEPS_NO_STATE_FOR_NEXT_USE nor + * STATE_NOT_REQUIRED_FOR_NEXT_USE are provided), the instance is assumed + * to keep state between reuses, and to require that state (equivalent to + * `ymmsl.KeepsStateForNextUse.NECESSARY`). */ STATE_NOT_REQUIRED_FOR_NEXT_USE = 8, }; diff --git a/libmuscle/python/libmuscle/instance.py b/libmuscle/python/libmuscle/instance.py index 905c8832..84c948bd 100644 --- a/libmuscle/python/libmuscle/instance.py +++ b/libmuscle/python/libmuscle/instance.py @@ -67,24 +67,25 @@ class InstanceFlags(Flag): KEEPS_NO_STATE_FOR_NEXT_USE = auto() """Indicate this instance does not carry state between iterations of the - reuse loop. + reuse loop. Specifying this flag is equivalent to + :external:py:attr:`ymmsl.KeepsStateForNextUse.NO`. - This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.NO`. - - If neither :attr:`KEEPS_NO_STATE_FOR_NEXT_USE` and - :attr:`STATE_NOT_REQUIRED_FOR_NEXT_USE` are supplied, this corresponds to - :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`. + By default, (if neither :attr:`KEEPS_NO_STATE_FOR_NEXT_USE` nor + :attr:`STATE_NOT_REQUIRED_FOR_NEXT_USE` are provided), the instance is assumed + to keep state between reuses, and to require that state (equivalent to + :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`). """ STATE_NOT_REQUIRED_FOR_NEXT_USE = auto() """Indicate this instance carries state between iterations of the reuse loop, however this state is not required for restarting. + Specifying this flag is equivalent to + :external:py:attr:`ymmsl.KeepsStateForNextUse.HELPFUL`. - This corresponds to :external:py:attr:`ymmsl.KeepsStateForNextUse.HELPFUL`. - - If neither :attr:`KEEPS_NO_STATE_FOR_NEXT_USE` and - :attr:`STATE_NOT_REQUIRED_FOR_NEXT_USE` are supplied, this corresponds to - :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`. + By default, (if neither :attr:`KEEPS_NO_STATE_FOR_NEXT_USE` nor + :attr:`STATE_NOT_REQUIRED_FOR_NEXT_USE` are provided), the instance is assumed + to keep state between reuses, and to require that state (equivalent to + :external:py:attr:`ymmsl.KeepsStateForNextUse.NECESSARY`). """ From 3b5b5ad83c6dded007d6cc6d3a959aa2a42e4b15 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 13:27:02 +0100 Subject: [PATCH 040/188] Introduce MPI macros in instance.cpp Allows more compact definitions of Instance::Instance constructors. --- libmuscle/cpp/src/libmuscle/instance.cpp | 60 +++++++----------------- 1 file changed, 16 insertions(+), 44 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index dcdb9488..527713f1 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -902,68 +902,40 @@ void Instance::Impl::shutdown_() { * These just forward to the hidden implementations above. */ -Instance::Instance( - int argc, char const * const argv[] -#ifdef MUSCLE_ENABLE_MPI - , MPI_Comm const & communicator - , int root -#endif - ) - : pimpl_(new Impl( - argc, argv, {{}}, InstanceFlags::NONE #ifdef MUSCLE_ENABLE_MPI - , communicator, root +#define MPI_ARGS_DECL , MPI_Comm const & communicator, int root +#define MPI_ARGS_CALL , communicator, root +#else +#define MPI_ARGS_DECL +#define MPI_ARGS_CALL #endif - )) + +Instance::Instance( + int argc, char const * const argv[] + MPI_ARGS_DECL) + : pimpl_(new Impl(argc, argv, {}, InstanceFlags::NONE MPI_ARGS_CALL)) {} Instance::Instance( int argc, char const * const argv[], PortsDescription const & ports -#ifdef MUSCLE_ENABLE_MPI - , MPI_Comm const & communicator - , int root -#endif - ) - : pimpl_(new Impl( - argc, argv, ports, InstanceFlags::NONE -#ifdef MUSCLE_ENABLE_MPI - , communicator, root -#endif - )) + MPI_ARGS_DECL) + : pimpl_(new Impl(argc, argv, ports, InstanceFlags::NONE MPI_ARGS_CALL)) {} Instance::Instance( int argc, char const * const argv[], InstanceFlags flags -#ifdef MUSCLE_ENABLE_MPI - , MPI_Comm const & communicator - , int root -#endif - ) - : pimpl_(new Impl( - argc, argv, {{}}, flags -#ifdef MUSCLE_ENABLE_MPI - , communicator, root -#endif - )) + MPI_ARGS_DECL) + : pimpl_(new Impl(argc, argv, {}, flags MPI_ARGS_CALL)) {} Instance::Instance( int argc, char const * const argv[], PortsDescription const & ports, InstanceFlags flags -#ifdef MUSCLE_ENABLE_MPI - , MPI_Comm const & communicator - , int root -#endif - ) - : pimpl_(new Impl( - argc, argv, ports, flags -#ifdef MUSCLE_ENABLE_MPI - , communicator, root -#endif - )) + MPI_ARGS_DECL) + : pimpl_(new Impl(argc, argv, ports, flags MPI_ARGS_CALL)) {} Instance::~Instance() = default; From 2a9d8e39b73840c40f1fe844f50c318aa128ec11 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Wed, 1 Feb 2023 15:09:54 +0100 Subject: [PATCH 041/188] Port api_guard to C++ Also fix some small things in the Python implementation. --- libmuscle/cpp/src/libmuscle/api_guard.cpp | 168 +++++++++++++++ libmuscle/cpp/src/libmuscle/api_guard.hpp | 152 ++++++++++++++ .../src/libmuscle/tests/test_api_guard.cpp | 193 ++++++++++++++++++ libmuscle/python/libmuscle/api_guard.py | 15 +- 4 files changed, 516 insertions(+), 12 deletions(-) create mode 100644 libmuscle/cpp/src/libmuscle/api_guard.cpp create mode 100644 libmuscle/cpp/src/libmuscle/api_guard.hpp create mode 100644 libmuscle/cpp/src/libmuscle/tests/test_api_guard.cpp diff --git a/libmuscle/cpp/src/libmuscle/api_guard.cpp b/libmuscle/cpp/src/libmuscle/api_guard.cpp new file mode 100644 index 00000000..f2d00125 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/api_guard.cpp @@ -0,0 +1,168 @@ +#include + +#include + +namespace libmuscle { namespace impl { + +APIGuard::APIGuard(bool uses_checkpointing) + : phase_(APIPhase::BEFORE_FIRST_REUSE_INSTANCE), + uses_checkpointing_(uses_checkpointing) +{} + +void APIGuard::verify_reuse_instance() { + if (phase_ != APIPhase::BEFORE_REUSE_INSTANCE + && phase_ != APIPhase::BEFORE_FIRST_REUSE_INSTANCE) { + throw std::runtime_error( + "We reached the end of the reuse loop without checking" + " if a snapshot should be saved. Please add at least" + " a should_save_final_snapshot and save_final_snapshot."); + } +} + +void APIGuard::reuse_instance_done(bool reusing) { + if (!reusing) { + phase_ = APIPhase::AFTER_REUSE_LOOP; + } else { + if (uses_checkpointing_) { + phase_ = APIPhase::BEFORE_RESUMING; + } else { + phase_ = APIPhase::BEFORE_REUSE_INSTANCE; + } + } +} + +void APIGuard::verify_resuming() { + if (!uses_checkpointing_) { + throw std::runtime_error( + "Please add the flag" + " InstanceFlag::USES_CHECKPOINT_API to your" + " instance to use the MUSCLE3 checkpointing API."); + } + if (phase_ != APIPhase::BEFORE_RESUMING) { + throw std::runtime_error( + "Please call resuming() only as the first thing in the" + " reuse loop."); + } +} + +void APIGuard::resuming_done(bool resuming) { + if (resuming) { + phase_ = APIPhase::BEFORE_LOAD_SNAPSHOT; + } else { + phase_ = APIPhase::BEFORE_SHOULD_INIT; + } +} + +void APIGuard::verify_load_snapshot() { + if (phase_ != APIPhase::BEFORE_LOAD_SNAPSHOT) { + throw std::runtime_error( + "Please check that we are resuming by calling resuming()" + " before calling load_snapshot()"); + } +} + +void APIGuard::load_snapshot_done() { + phase_ = APIPhase::BEFORE_SHOULD_INIT; +} + +void APIGuard::verify_should_init() { + if (phase_ != APIPhase::BEFORE_SHOULD_INIT) { + throw std::runtime_error( + "Please check whether to run f_init using should_init()" + " after resuming, and before trying to save a snapshot."); + } +} + +void APIGuard::should_init_done() { + phase_ = APIPhase::BEFORE_SHOULD_SAVE_SNAPSHOT; +} + +void APIGuard::verify_should_save_snapshot() { + if (phase_ != APIPhase::BEFORE_SHOULD_SAVE_SNAPSHOT) { + generic_error_messages_("should_save_snapshot"); + throw std::runtime_error("Should be unreachable."); + } +} + +void APIGuard::should_save_snapshot_done(bool should_save) { + if (should_save) { + phase_ = APIPhase::BEFORE_SAVE_SNAPSHOT; + } +} + +void APIGuard::verify_save_snapshot() { + if (phase_ != APIPhase::BEFORE_SAVE_SNAPSHOT) { + generic_error_messages_("save_snapshot"); + throw std::runtime_error("Should be unreachable."); + } +} + +void APIGuard::save_snapshot_done() { + phase_ = APIPhase::BEFORE_SHOULD_SAVE_SNAPSHOT; +} + +void APIGuard::verify_should_save_final_snapshot() { + if (phase_ != APIPhase::BEFORE_SHOULD_SAVE_SNAPSHOT) { + generic_error_messages_("should_save_final_snapshot"); + throw std::runtime_error("Should be unreachable."); + } +} + +void APIGuard::should_save_final_snapshot_done(bool should_save) { + if (should_save) { + phase_ = APIPhase::BEFORE_SAVE_FINAL_SNAPSHOT; + } else { + phase_ = APIPhase::BEFORE_REUSE_INSTANCE; + } +} + +void APIGuard::verify_save_final_snapshot() { + if (phase_ != APIPhase::BEFORE_SAVE_FINAL_SNAPSHOT) { + generic_error_messages_("save_final_snapshot"); + throw std::runtime_error("Should be unreachable."); + } +} + +void APIGuard::save_final_snapshot_done() { + phase_ = APIPhase::BEFORE_REUSE_INSTANCE; +} + +void APIGuard::generic_error_messages_(std::string verify_phase) { + std::ostringstream oss; + switch (phase_) { + case APIPhase::BEFORE_FIRST_REUSE_INSTANCE: + case APIPhase::AFTER_REUSE_LOOP: + oss << "Please only call " << verify_phase << " inside the reuse loop."; + break; + case APIPhase::BEFORE_REUSE_INSTANCE: + oss << "Please do not call " << verify_phase << " after"; + oss << " should_save_final_snapshot. should_save_final_snapshot"; + oss << " should be at the end of the reuse loop."; + break; + case APIPhase::BEFORE_RESUMING: + oss << "Inside the reuse loop you must call resuming first."; + break; + case APIPhase::BEFORE_LOAD_SNAPSHOT: + oss << "If resuming returns True, then you must call load_snapshot first."; + break; + case APIPhase::BEFORE_SHOULD_INIT: + oss << "After calling resuming, you must call should_init first."; + break; + case APIPhase::BEFORE_SHOULD_SAVE_SNAPSHOT: + oss << "You must call save_snapshot or save_final_snapshot first."; + break; + case APIPhase::BEFORE_SAVE_SNAPSHOT: + oss << "If should_save_snapshot returns True, then you must"; + oss << " call save_snapshot first."; + break; + case APIPhase::BEFORE_SAVE_FINAL_SNAPSHOT: + oss << "If should_save_final_snapshot returns True, then you"; + oss << " must call save_final_snapshot first."; + break; + default: + return; + } + throw std::runtime_error(oss.str()); +} + +} } diff --git a/libmuscle/cpp/src/libmuscle/api_guard.hpp b/libmuscle/cpp/src/libmuscle/api_guard.hpp new file mode 100644 index 00000000..a4517055 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/api_guard.hpp @@ -0,0 +1,152 @@ +#pragma once + +#include +#include + +namespace libmuscle { namespace impl { + +/** Different phases that the user code traverses. + * + * These values describe different regions that the model code can be + * in for the case where checkpointing is implemented. By tracking + * the phase that the model should be in, we can detect incorrect API + * usage. + * + * This does not match the yMMSL operators, as it is more + * fine-grained and concerns checkpointing, which is not represented + * in the SEL. + */ +enum class APIPhase { + /** Before the first time calling reuse_instance + */ + BEFORE_FIRST_REUSE_INSTANCE, + + /** Before calling reuse_instance + */ + BEFORE_REUSE_INSTANCE, + + /** Between reuse_instance and resuming + */ + BEFORE_RESUMING, + + /** Between resuming and load_snapshot + */ + BEFORE_LOAD_SNAPSHOT, + + /** After resuming, before should_init + */ + BEFORE_SHOULD_INIT, + + /** Between should_init and should_save* + */ + BEFORE_SHOULD_SAVE_SNAPSHOT, + + /** Between should_save_snapshot and save_snapshot + */ + BEFORE_SAVE_SNAPSHOT, + + /** Between should_save_final_snapshot and save_final_snapshot + */ + BEFORE_SAVE_FINAL_SNAPSHOT, + + /** After the final call to reuse_instance() + */ + AFTER_REUSE_LOOP +}; + +/** Keeps track of and checks in which phase the model is. + * + * The verify_* functions are called when the corresponding function + * on Instance is called, to check that we're in the right phase. They + * raise a RuntimeError if there's a problem. The *_done functions are + * called to signal that the corresponding function finished + * successfully, and that we are moving on to the next phase. + */ +class APIGuard { + public: + /** Create an APIGuard + * + * This starts the tracker in the phase BEFORE_FIRST_REUSE_INSTANCE. + */ + explicit APIGuard(bool uses_checkpointing); + + /** Check reuse_instance() + */ + void verify_reuse_instance(); + + /** Update phase on successful reuse_instance(). + * + * @param reusing Whether we are reusing or not. + */ + void reuse_instance_done(bool reusing); + + /** Check resuming() + */ + void verify_resuming(); + + /** Update phase on successful resuming(). + * + * @param resuming Whether we're resuming or not. + */ + void resuming_done(bool resuming); + + /** Check load_snapshot() + */ + void verify_load_snapshot(); + + /** Update phase on successful load_snapshot() + */ + void load_snapshot_done(); + + /** Check should_init() + */ + void verify_should_init(); + + /** Update phase on successful should_init() + */ + void should_init_done(); + + /** Check should_save_snapshot() + */ + void verify_should_save_snapshot(); + + /** Update phase on successful should_save_snapshot(). + * + * @param should_save Whether we should save or not. + */ + void should_save_snapshot_done(bool should_save); + + /** Check save_snapshot() + */ + void verify_save_snapshot(); + + /** Update phase on successful save_snapshot() + */ + void save_snapshot_done(); + + /** Check should_save_final_snapshot(). + */ + void verify_should_save_final_snapshot(); + + /** Update phase on successful should_save_snapshot(). + * + * @param should_save Whether we should save or not. + */ + void should_save_final_snapshot_done(bool should_save); + + /** Check should_save_final_snapshot() + */ + void verify_save_final_snapshot(); + + /** Updates state on successful save_final_snapshot() + */ + void save_final_snapshot_done(); + + private: + APIPhase phase_; + bool uses_checkpointing_; + + void generic_error_messages_(std::string verify_phase); +}; + +} } diff --git a/libmuscle/cpp/src/libmuscle/tests/test_api_guard.cpp b/libmuscle/cpp/src/libmuscle/tests/test_api_guard.cpp new file mode 100644 index 00000000..d41302ef --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/tests/test_api_guard.cpp @@ -0,0 +1,193 @@ +#include + +#include + +#include +#include + +using libmuscle::impl::APIGuard; +using libmuscle::impl::APIPhase; + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +TEST(libmuscle_api_guard, test_no_checkpointing_support) { + auto guard = APIGuard(false); + for (int i=0; i<3; ++i) { + guard.verify_reuse_instance(); + guard.reuse_instance_done(true); + } + + guard.verify_reuse_instance(); + guard.reuse_instance_done(false); +} + +TEST(libmuscle_api_guard, test_final_snapshot_only) { + auto guard = APIGuard(true); + for (int i=0; i<4; ++i) { + guard.verify_reuse_instance(); + guard.reuse_instance_done(true); + + guard.verify_resuming(); + if (i == 0) { + guard.resuming_done(true); + + guard.verify_load_snapshot(); + guard.load_snapshot_done(); + } else { + guard.resuming_done(false); + } + + guard.verify_should_init(); + guard.should_init_done(); + + guard.verify_should_save_final_snapshot(); + if (i == 2) { + guard.should_save_final_snapshot_done(true); + + guard.verify_save_final_snapshot(); + guard.save_final_snapshot_done(); + } else { + guard.should_save_final_snapshot_done(false); + } + } + + guard.verify_reuse_instance(); + guard.reuse_instance_done(false); +} + + +TEST(libmuscle_api_guard, test_full_checkpointing) { + auto guard = APIGuard(true); + for (int i=0; i<4; ++i) { + guard.verify_reuse_instance(); + guard.reuse_instance_done(true); + + guard.verify_resuming(); + if (i == 0) { + guard.resuming_done(true); + + guard.verify_load_snapshot(); + guard.load_snapshot_done(); + } else { + guard.resuming_done(false); + } + + guard.verify_should_init(); + guard.should_init_done(); + + for (int j=0; j<3; ++j) { + guard.verify_should_save_snapshot(); + if (j != 2) { + guard.should_save_snapshot_done(true); + + guard.verify_save_snapshot(); + guard.save_snapshot_done(); + } else { + guard.should_save_snapshot_done(false); + } + } + + guard.verify_should_save_final_snapshot(); + if (i == 2) { + guard.should_save_final_snapshot_done(true); + + guard.verify_save_final_snapshot(); + guard.save_final_snapshot_done(); + } else { + guard.should_save_final_snapshot_done(false); + } + } + + guard.verify_reuse_instance(); + guard.reuse_instance_done(false); +} + +static std::vector< std::function > api_guard_funs_({ + [](APIGuard & guard){ guard.verify_reuse_instance(); }, // 0 + [](APIGuard & guard){ guard.reuse_instance_done(true); }, // 1 + [](APIGuard & guard){ guard.verify_resuming(); }, // 2 + [](APIGuard & guard){ guard.resuming_done(true); }, // 3 + [](APIGuard & guard){ guard.verify_load_snapshot(); }, // 4 + [](APIGuard & guard){ guard.load_snapshot_done(); }, // 5 + [](APIGuard & guard){ guard.verify_should_init(); }, // 6 + [](APIGuard & guard){ guard.should_init_done(); }, // 7 + [](APIGuard & guard){ guard.verify_should_save_snapshot(); }, // 8 + [](APIGuard & guard){ guard.should_save_snapshot_done(true); }, // 9 + [](APIGuard & guard){ guard.verify_save_snapshot(); }, // 10 + [](APIGuard & guard){ guard.save_snapshot_done(); }, // 11 + [](APIGuard & guard){ guard.verify_should_save_final_snapshot(); }, // 12 + [](APIGuard & guard){ guard.should_save_final_snapshot_done(true); }, // 13 + [](APIGuard & guard){ guard.verify_save_final_snapshot(); } // 14 +}); + +void run_until_before(APIGuard & guard, int fun) { + for (int i=0; i excluded) { + for (uint i=0; i None: - """Create an APIPhaseTracker. + """Create an APIGuard. - This starts the tracker in BEFORE_FIRST_REUSE_INSTANCE. + This starts the tracker in the phase BEFORE_FIRST_REUSE_INSTANCE. """ self._phase = APIPhase.BEFORE_FIRST_REUSE_INSTANCE self._uses_checkpointing = uses_checkpointing @@ -73,7 +65,7 @@ def _generic_error_messages(self, verify_phase: str) -> None: msg = f'Please only call {verify_phase} inside the reuse loop.' elif self._phase == APIPhase.BEFORE_REUSE_INSTANCE: msg = ( - 'Please do not call {verify_phase} after' + f'Please do not call {verify_phase} after' ' should_save_final_snapshot. should_save_final_snapshot' ' should be at the end of the reuse loop.') elif self._phase == APIPhase.BEFORE_RESUMING: @@ -140,7 +132,6 @@ def resuming_done(self, resuming: bool) -> None: Args: resuming: Whether we're resuming or not. """ - self._uses_checkpointing = True if resuming: self._phase = APIPhase.BEFORE_LOAD_SNAPSHOT else: From f8923a89e3b6a5c536b02cf1a49485f83d0c1831 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 2 Feb 2023 14:03:51 +0100 Subject: [PATCH 042/188] Port checkpoint_triggers to C++ Also fix some small things in the Python implementation. --- .../cpp/src/libmuscle/checkpoint_triggers.cpp | 268 ++++++++++++++++++ .../cpp/src/libmuscle/checkpoint_triggers.hpp | 165 +++++++++++ .../tests/test_checkpoint_triggers.cpp | 218 ++++++++++++++ .../python/libmuscle/checkpoint_triggers.py | 5 +- .../test/test_checkpoint_triggers.py | 2 +- 5 files changed, 653 insertions(+), 5 deletions(-) create mode 100644 libmuscle/cpp/src/libmuscle/checkpoint_triggers.cpp create mode 100644 libmuscle/cpp/src/libmuscle/checkpoint_triggers.hpp create mode 100644 libmuscle/cpp/src/libmuscle/tests/test_checkpoint_triggers.cpp diff --git a/libmuscle/cpp/src/libmuscle/checkpoint_triggers.cpp b/libmuscle/cpp/src/libmuscle/checkpoint_triggers.cpp new file mode 100644 index 00000000..699f1a0f --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/checkpoint_triggers.cpp @@ -0,0 +1,268 @@ +#include + +#include +#include + +namespace libmuscle { namespace impl { + +AtCheckpointTrigger::AtCheckpointTrigger( + std::vector & at) : at_(at) { + std::sort(at_.begin(), at_.end()); +} + +Optional AtCheckpointTrigger::next_checkpoint(double cur_time) { + if (cur_time >= at_.back()) + return {}; // no future checkpoint left + auto iter = std::upper_bound(at_.begin(), at_.end(), cur_time); + return *iter; +} + +Optional AtCheckpointTrigger::previous_checkpoint(double cur_time) { + if (cur_time < at_.front()) + return {}; // no future checkpoint left + auto iter = std::upper_bound(at_.begin(), at_.end(), cur_time); + return *(iter - 1); +} + +namespace { + +Optional parse_optional_double( + DataConstRef const & map, std::string const & key) { + auto value = map[key]; + if (value.is_nil()) + return {}; + if (value.is_a()) + return value.as(); + if (value.is_a()) + return value.as(); + // python code should only send doubles or integers, so not checking for floats + throw std::runtime_error( + "Invalid type of item " + key + ", was expecting integer or double."); +} + +} + +RangeCheckpointTrigger::RangeCheckpointTrigger( + DataConstRef const & encoded_range_rule) { + start_ = parse_optional_double(encoded_range_rule, "start"); + stop_ = parse_optional_double(encoded_range_rule, "stop"); + auto every = parse_optional_double(encoded_range_rule, "every"); + if (!every.is_set()) + throw std::runtime_error("Received a nil value for every."); + every_ = every.get(); + + if (stop_.is_set()) { + double start = start_.is_set() ? start_.get() : 0; + double diff = stop_.get() - start; + last_ = start + std::floor(diff / every_) * every_; + } else { + last_ = {}; + } +} + +Optional RangeCheckpointTrigger::next_checkpoint(double cur_time) { + if (start_.is_set() && cur_time < start_.get()) { + return start_; + } + if (last_.is_set() && cur_time >= last_.get()) { + return {}; + } + double start = start_.is_set() ? start_.get() : 0; + double diff = cur_time - start; + return start + std::floor(diff / every_ + 1) * every_; +} + +Optional RangeCheckpointTrigger::previous_checkpoint(double cur_time) { + if (start_.is_set() && cur_time < start_.get()) { + return {}; + } + if (last_.is_set() && cur_time >= last_.get()) { + return last_; + } + double start = start_.is_set() ? start_.get() : 0; + double diff = cur_time - start; + return start + std::floor(diff / every_) * every_; +} + +CombinedCheckpointTriggers::CombinedCheckpointTriggers( + DataConstRef const & encoded_checkpoint_rules) + : triggers_() { + std::vector at; + for (std::size_t i=0; i()) + at.push_back(value.as()); + else + at.push_back(value.as()); + } + } else { + triggers_.push_back( + std::make_unique(rule)); + } + } + if (!at.empty()) { + triggers_.push_back(std::make_unique(at)); + } +} + +Optional CombinedCheckpointTriggers::next_checkpoint(double cur_time) { + Optional retval; + for (auto & trigger : triggers_) { + auto checkpoint = trigger->next_checkpoint(cur_time); + if (checkpoint.is_set()) { + if (!retval.is_set() || retval.get() > checkpoint.get()) { + retval = checkpoint; + } + } + } + return retval; +} + +Optional CombinedCheckpointTriggers::previous_checkpoint(double cur_time) { + Optional retval; + for (auto & trigger : triggers_) { + auto checkpoint = trigger->previous_checkpoint(cur_time); + if (checkpoint.is_set()) { + if (!retval.is_set() || retval.get() < checkpoint.get()) { + retval = checkpoint; + } + } + } + return retval; +} + +bool CombinedCheckpointTriggers::has_rules() const { + return !triggers_.empty(); +} + +TriggerManager::TriggerManager() + : has_checkpoints_(false) + , last_triggers_() + , cpts_considered_until_(-INFINITY) + , simulation_epoch_() + , checkpoint_at_end_(false) + , wall_() + , prevwall_(0) + , nextwall_(0) + , sim_() + , prevsim_() + , nextsim_() +{} + +void TriggerManager::set_checkpoint_info( + double elapsed, DataConstRef const & encoded_checkpoints) { + auto elapsed_as_duration = \ + std::chrono::duration_cast( + std::chrono::duration(elapsed)); + simulation_epoch_ = std::chrono::steady_clock::now() - elapsed_as_duration; + + checkpoint_at_end_ = encoded_checkpoints["at_end"].as(); + + wall_ = std::make_unique( + encoded_checkpoints["wallclock_time"]); + prevwall_ = 0.0; + nextwall_ = wall_->next_checkpoint(0.0); + + sim_ = std::make_unique( + encoded_checkpoints["simulation_time"]); + prevsim_ = {}; + nextsim_ = {}; + + has_checkpoints_ = checkpoint_at_end_ || wall_->has_rules() || sim_->has_rules(); +} + +double TriggerManager::elapsed_walltime() { + auto duration = std::chrono::steady_clock::now() - simulation_epoch_; + return std::chrono::duration(duration).count(); +} + +double TriggerManager::checkpoints_considered_until() { + return cpts_considered_until_; +} + +void TriggerManager::harmonise_wall_time(double at_least) { + double cur = elapsed_walltime(); + if (cur < at_least) { + auto duration = std::chrono::duration_cast( + std::chrono::duration(at_least - cur)); + simulation_epoch_ -= duration; + } +} + +bool TriggerManager::should_save_snapshot(double timestamp) { + if (!has_checkpoints_) return false; + + return should_save_(timestamp); +} + +bool TriggerManager::should_save_final_snapshot( + bool do_reuse, Optional f_init_max_timestamp) { + if (!has_checkpoints_) return false; + + bool value = false; + if (!do_reuse) { + if (checkpoint_at_end_) { + value = true; + last_triggers_.push_back("at_end"); + } + } else if (!f_init_max_timestamp.is_set()) { + // No F_INIT messages received: reuse triggered on muscle_settings_in + // message. + // _logger.debug('Reuse triggered by muscle_settings_in.' + // ' Not creating a snapshot.') + } else { + value = should_save_(f_init_max_timestamp.get()); + } + + return value; +} + +void TriggerManager::update_checkpoints(double timestamp) { + prevwall_ = elapsed_walltime(); + nextwall_ = wall_->next_checkpoint(prevwall_); + + prevsim_ = timestamp; + nextsim_ = sim_->next_checkpoint(timestamp); +} + +std::vector TriggerManager::get_triggers() { + auto triggers = last_triggers_; + last_triggers_ = std::vector(); + return triggers; +} + +bool TriggerManager::should_save_(double simulation_time) { + if (!nextsim_.is_set() && !prevsim_.is_set()) { + // we cannot make assumptions about the start time of a simulation, + // a t=-1000 could make sense if t represents years since CE + // and we should not disallow checkpointing for negative t + auto previous = sim_->previous_checkpoint(simulation_time); + if (previous.is_set()) { + // there is a checkpoint rule before the current moment, assume + // we should have taken a snapshot back then + nextsim_ = previous; + } else { + nextsim_ = sim_->next_checkpoint(simulation_time); + } + } + + double walltime = elapsed_walltime(); + cpts_considered_until_ = walltime; + + last_triggers_.clear(); + if (nextwall_.is_set() && walltime >= nextwall_.get()) { + last_triggers_.push_back(std::string("wallclock_time >= ") + + std::to_string(nextwall_.get())); + } + if (nextsim_.is_set() && simulation_time >= nextsim_.get()) { + last_triggers_.push_back(std::string("simulation_time >= ") + + std::to_string(nextsim_.get())); + } + return !last_triggers_.empty(); +} + +} } diff --git a/libmuscle/cpp/src/libmuscle/checkpoint_triggers.hpp b/libmuscle/cpp/src/libmuscle/checkpoint_triggers.hpp new file mode 100644 index 00000000..bc8d2fc1 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/checkpoint_triggers.hpp @@ -0,0 +1,165 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace libmuscle { namespace impl { + +/** Represents a trigger for creating snapshots + */ +class CheckpointTrigger { + public: + virtual ~CheckpointTrigger() = default; + + /** Calculate the next checkpoint time + * + * @param cur_time The current time. + * @return Optional The time when a next checkpoint should be taken, or + * nil if this trigger has no checkpoint after cur_time. + */ + virtual Optional next_checkpoint(double cur_time) = 0; + + /** Calculate the previous checkpoint time + * + * @param cur_time The current time. + * @return Optional The time when a previous checkpoint should have been + * taken, or nil if this trigger has no checkpoint after cur_time. + */ + virtual Optional previous_checkpoint(double cur_time) = 0; +}; + +/** Represents a trigger based on an "at" checkpoint rule + * + * This triggers at the specified times. + */ +class AtCheckpointTrigger : public CheckpointTrigger { + public: + /** Create an "at" checkpoint trigger + * + * @param at List of checkpoint moments + */ + AtCheckpointTrigger(std::vector & at); + + Optional next_checkpoint(double cur_time) override; + Optional previous_checkpoint(double cur_time) override; + + private: + std::vector at_; +}; + +/** Represents a trigger based on a "ranges" checkpoint rule + * + * This triggers at a range of checkpoint moments. + * + * Equivalent an "at" rule ``[start, start + step, start + 2*step, ...]`` for + * as long as ``start + i*step <= stop``. + * + * Stop may be omitted, in which case the range is infinite. + * + * Start may be omitted, in which case the range is equivalent to an "at" rule + * ``[..., -n*step, ..., -step, 0, step, 2*step, ...]`` for as long as + * ``i*step <= stop``. + */ +class RangeCheckpointTrigger : public CheckpointTrigger { + public: + /** Create a "range" checkpoint trigger + * + * @param encoded_range_rules YMMSL CheckpointRangeRule encoded in a + * DataConstRef. + */ + RangeCheckpointTrigger(DataConstRef const & encoded_range_rule); + + Optional next_checkpoint(double cur_time) override; + Optional previous_checkpoint(double cur_time) override; + + private: + Optional start_; + Optional stop_; + double every_; + Optional last_; +}; + +/** Checkpoint trigger based on a combination of "at" and "ranges" + */ +class CombinedCheckpointTriggers : public CheckpointTrigger { + public: + /** Create an "at" checkpoint trigger + * + * @param encoded_checkpoint_rules List of YMMSL CheckpointRule encoded in a + * DataConstRef. + */ + explicit CombinedCheckpointTriggers(DataConstRef const & encoded_checkpoint_rules); + + Optional next_checkpoint(double cur_time) override; + Optional previous_checkpoint(double cur_time) override; + + bool has_rules() const; + private: + std::vector> triggers_; +}; + +/** Manages all checkpoint triggers and checks if a snapshot must be saved. + */ +class TriggerManager { + public: + TriggerManager(); + + /** Register checkpoint info received from the muscle manager. + */ + void set_checkpoint_info( + double elapsed, DataConstRef const & encoded_checkpoints); + + /** Returns elapsed wallclock_time in seconds. + */ + double elapsed_walltime(); + + /** Return elapsed time of last should_save* + */ + double checkpoints_considered_until(); + + /** Ensure our elapsed time is at least the given value + */ + void harmonise_wall_time(double at_least); + + /** Handles instance.should_save_snapshot + */ + bool should_save_snapshot(double timestamp); + + /** Handles instance.should_save_final_snapshot + */ + bool should_save_final_snapshot( + bool do_reuse, Optional f_init_max_timestamp); + + /** Update last and next checkpoint times when a snapshot is made. + * + * @param timestamp timestamp as reported by the instance (or from incoming + * F_INIT messages for save_final_snapshot). + */ + void update_checkpoints(double timestamp); + + /** Get trigger description(s) for the current reason for checkpointing. + */ + std::vector get_triggers(); + + private: + bool has_checkpoints_; + std::vector last_triggers_; + double cpts_considered_until_; + std::chrono::time_point simulation_epoch_; + bool checkpoint_at_end_; + std::unique_ptr wall_; + double prevwall_; + Optional nextwall_; + std::unique_ptr sim_; + Optional prevsim_; + Optional nextsim_; + + bool should_save_(double simulation_time); +}; + +} } diff --git a/libmuscle/cpp/src/libmuscle/tests/test_checkpoint_triggers.cpp b/libmuscle/cpp/src/libmuscle/tests/test_checkpoint_triggers.cpp new file mode 100644 index 00000000..2a53f315 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/tests/test_checkpoint_triggers.cpp @@ -0,0 +1,218 @@ +#include + +#include + +using libmuscle::impl::TriggerManager; +using libmuscle::impl::AtCheckpointTrigger; +using libmuscle::impl::RangeCheckpointTrigger; +using libmuscle::impl::CombinedCheckpointTriggers; +using libmuscle::impl::Data; + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +TEST(libmuscle_checkpoint_triggers, test_at_checkpoint_trigger) { + std::vector at = {1, 3, 4, 4.5, 9}; + AtCheckpointTrigger trigger(at); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(0).get(), 1); + ASSERT_FALSE(trigger.previous_checkpoint(0).is_set()); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(1).get(), 3); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(1).get(), 1); + + double eps = 1e-16; + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(1 - eps).get(), 1); + ASSERT_FALSE(trigger.previous_checkpoint(1 - eps).is_set()); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(3.9).get(), 4); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(3.9).get(), 3); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(4.1).get(), 4.5); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(4.1).get(), 4); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(5).get(), 9); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(5).get(), 4.5); + + ASSERT_FALSE(trigger.next_checkpoint(9).is_set()); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(9).get(), 9); + + ASSERT_FALSE(trigger.next_checkpoint(11).is_set()); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(11).get(), 9); +} + +TEST(libmuscle_checkpoint_triggers, test_range_checkpoint_trigger) { + auto range = Data::dict("start", 0, "stop", 20, "every", 1.2); + RangeCheckpointTrigger trigger(range); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(-1).get(), 0); + ASSERT_FALSE(trigger.previous_checkpoint(-1).is_set()); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(0).get(), 1.2); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(0).get(), 0); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(8).get(), 8.4); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(8).get(), 7.2); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(18.2).get(), 19.2); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(18.2).get(), 18); + + ASSERT_FALSE(trigger.next_checkpoint(20).is_set()); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(20).get(), 19.2); +} + +TEST(libmuscle_checkpoint_triggers, test_range_checkpoint_trigger_default_stop) { + auto range = Data::dict("start", 1, "stop", Data(), "every", 1.2); + RangeCheckpointTrigger trigger(range); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(-1.).get(), 1); + ASSERT_FALSE(trigger.previous_checkpoint(-1.).is_set()); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(148148.).get(), 148148.2); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(148148.).get(), 148147); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(148148148.).get(), 148148149); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(148148148.).get(), 148148147.8); +} + +TEST(libmuscle_checkpoint_triggers, test_range_checkpoint_trigger_default_start) { + auto range = Data::dict("start", Data(), "stop", 10, "every", 1.2); + RangeCheckpointTrigger trigger(range); + + ASSERT_FALSE(trigger.next_checkpoint(10).is_set()); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(10).get(), 9.6); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(0.0).get(), 1.2); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(0.0).get(), 0.0); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(-148148.).get(), -148147.2); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(-148148.).get(), -148148.4); +} + +TEST(libmuscle_checkpoint_triggers, test_combined_checkpoint_trigger_every_at) { + auto rules = Data::nils(2); + rules[0] = Data::dict("start", Data(), "stop", Data(), "every", 10); + rules[1] = Data::dict("at", Data::list(3, 7, 13, 17)); + CombinedCheckpointTriggers trigger(rules); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(-11.).get(), -10); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(-11).get(), -20); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(0.).get(), 3); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(0.).get(), 0); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(8.3).get(), 10); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(8.3).get(), 7); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(14.2).get(), 17); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(14.2).get(), 13); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(25.2).get(), 30); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(25.2).get(), 20); +} + +TEST(libmuscle_checkpoint_triggers, test_combined_checkpoint_trigger_at_ranges) { + auto rules = Data::nils(3); + rules[0] = Data::dict("at", Data::list(3, 7, 13, 17)); + rules[1] = Data::dict("start", 0, "stop", 20, "every", 5); + rules[2] = Data::dict("start", 20, "stop", 100, "every", 20); + CombinedCheckpointTriggers trigger(rules); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(-11.).get(), 0); + ASSERT_FALSE(trigger.previous_checkpoint(-11).is_set()); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(0.).get(), 3); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(0.).get(), 0); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(8.3).get(), 10); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(8.3).get(), 7); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(14.2).get(), 15); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(14.2).get(), 13); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(19.3).get(), 20); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(19.3).get(), 17); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(25.2).get(), 40); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(25.2).get(), 20); + + ASSERT_DOUBLE_EQ(trigger.next_checkpoint(95.2).get(), 100); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(95.2).get(), 80); + + ASSERT_FALSE(trigger.next_checkpoint(125.2).is_set()); + ASSERT_DOUBLE_EQ(trigger.previous_checkpoint(125.2).get(), 100); +} + +TEST(libmuscle_checkpoint_triggers, test_trigger_manager_reference_time) { + auto encoded_checkpoints = Data::dict( + "at_end", true, + "wallclock_time", Data::list(), + "simulation_time", Data::list() + ); + + auto start = std::chrono::steady_clock::now(); + double ref_elapsed = 15.0; + + TriggerManager trigger_manager; + trigger_manager.set_checkpoint_info(ref_elapsed, encoded_checkpoints); + double elapsed_walltime = trigger_manager.elapsed_walltime(); + auto duration = std::chrono::steady_clock::now() - start; + double duration_d = std::chrono::duration(duration).count(); + + ASSERT_LT(ref_elapsed, elapsed_walltime); + ASSERT_LT(elapsed_walltime, ref_elapsed + duration_d); +} + +TEST(libmuscle_checkpoint_triggers, test_trigger_manager) { + double ref_elapsed = 0.0; + TriggerManager trigger_manager; + + auto wallclock_time_list = Data::nils(1); + wallclock_time_list[0] = Data::dict("at", Data::list(1e-12)); + auto simulation_time_list = Data::nils(1); + simulation_time_list[0] = Data::dict("at", Data::list(1, 3, 5)); + auto encoded_checkpoints = Data::dict( + "at_end", true, + "wallclock_time", wallclock_time_list, + "simulation_time", simulation_time_list + ); + trigger_manager.set_checkpoint_info(ref_elapsed, encoded_checkpoints); + + ASSERT_TRUE(trigger_manager.should_save_snapshot(0.1)); + auto triggers = trigger_manager.get_triggers(); + ASSERT_EQ(triggers.size(), 1); + ASSERT_NE(triggers[0].find("wallclock_time"), std::string::npos); + trigger_manager.update_checkpoints(0.1); + + ASSERT_FALSE(trigger_manager.should_save_snapshot(0.99)); + + ASSERT_TRUE(trigger_manager.should_save_snapshot(3.2)); + triggers = trigger_manager.get_triggers(); + ASSERT_EQ(triggers.size(), 1); + ASSERT_NE(triggers[0].find("simulation_time"), std::string::npos); + trigger_manager.update_checkpoints(3.2); + + ASSERT_TRUE(trigger_manager.should_save_final_snapshot(true, 7.0)); + ASSERT_GT(trigger_manager.get_triggers().size(), 0); + trigger_manager.update_checkpoints(7.0); + + ASSERT_FALSE(trigger_manager.should_save_snapshot(7.1)); + + ASSERT_TRUE(trigger_manager.should_save_final_snapshot(false, {})); + trigger_manager.update_checkpoints(7.1); +} + +TEST(libmuscle_checkpoint_triggers, test_no_checkpointing) { + TriggerManager trigger_manager; + auto encoded_checkpoints = Data::dict( + "at_end", false, + "wallclock_time", Data::list(), + "simulation_time", Data::list() + ); + trigger_manager.set_checkpoint_info(0.0, encoded_checkpoints); + ASSERT_FALSE(trigger_manager.should_save_snapshot(1)); + ASSERT_FALSE(trigger_manager.should_save_snapshot(5000)); + ASSERT_FALSE(trigger_manager.should_save_final_snapshot(false, {})); +} diff --git a/libmuscle/python/libmuscle/checkpoint_triggers.py b/libmuscle/python/libmuscle/checkpoint_triggers.py index 5d47c0d3..887a4d54 100644 --- a/libmuscle/python/libmuscle/checkpoint_triggers.py +++ b/libmuscle/python/libmuscle/checkpoint_triggers.py @@ -78,9 +78,6 @@ class RangeCheckpointTrigger(CheckpointTrigger): Start may be omitted, in which case the range is equivalent to an "at" rule ``[..., -n*step, ..., -step, 0, step, 2*step, ...]`` for as long as ``i*step <= stop``. - - Note: the "every" rule is a special case of a range with start and stop - omitted, and is handled by this class as well """ def __init__(self, range: CheckpointRangeRule) -> None: @@ -118,7 +115,7 @@ def previous_checkpoint(self, cur_time: float) -> Optional[float]: class CombinedCheckpointTriggers(CheckpointTrigger): - """Checkpoint trigger based on a combination of "every", "at" and "ranges" + """Checkpoint trigger based on a combination of "at" and "ranges" """ def __init__(self, checkpoint_rules: List[CheckpointRule]) -> None: diff --git a/libmuscle/python/libmuscle/test/test_checkpoint_triggers.py b/libmuscle/python/libmuscle/test/test_checkpoint_triggers.py index e111a758..c4df4d56 100644 --- a/libmuscle/python/libmuscle/test/test_checkpoint_triggers.py +++ b/libmuscle/python/libmuscle/test/test_checkpoint_triggers.py @@ -142,7 +142,7 @@ def test_trigger_manager_reference_time(): trigger_manager.set_checkpoint_info(ref_elapsed, Checkpoints(at_end=True)) elapsed_walltime = trigger_manager.elapsed_walltime() duration = time.monotonic() - monotonic_start - assert ref_elapsed < elapsed_walltime <= (ref_elapsed + duration) + assert ref_elapsed < elapsed_walltime < (ref_elapsed + duration) def test_trigger_manager(): From 22c2d85502ebfd58b6ac8706d64a0e90107361a2 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Thu, 2 Feb 2023 17:11:51 +0100 Subject: [PATCH 043/188] Port Snapshot to C++ --- libmuscle/cpp/src/libmuscle/snapshot.cpp | 124 ++++++++++++++++++ libmuscle/cpp/src/libmuscle/snapshot.hpp | 49 +++++++ .../cpp/src/libmuscle/tests/test_snapshot.cpp | 59 +++++++++ 3 files changed, 232 insertions(+) create mode 100644 libmuscle/cpp/src/libmuscle/snapshot.cpp create mode 100644 libmuscle/cpp/src/libmuscle/snapshot.hpp create mode 100644 libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp diff --git a/libmuscle/cpp/src/libmuscle/snapshot.cpp b/libmuscle/cpp/src/libmuscle/snapshot.cpp new file mode 100644 index 00000000..a2e7fa65 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/snapshot.cpp @@ -0,0 +1,124 @@ +#include + +#include + +#include +#include + + +namespace libmuscle { namespace impl { + +Snapshot::Snapshot( + std::vector triggers, + double wallclock_time, + std::unordered_map> port_message_counts, + bool is_final_snapshot, + Optional message, + ::ymmsl::Settings settings_overlay + ) + : triggers_(triggers) + , wallclock_time_(wallclock_time) + , port_message_counts_(port_message_counts) + , is_final_snapshot_(is_final_snapshot) + , message_(message) + , settings_overlay_(settings_overlay) + {} + +Snapshot Snapshot::from_bytes(DataConstRef const & data) { + // decode + auto zone = std::make_shared(); + DataConstRef dict = mcp::unpack_data(zone, data.as_byte_array(), data.size()); + + // convert lists/dicts to vectors/unordered_maps + std::vector triggers; + auto data_triggers = dict["triggers"]; + for (std::size_t i=0; i()); + } + + std::unordered_map> port_message_counts; + auto data_pmc = dict["port_message_counts"]; + for (std::size_t i=0; i counts; + for (std::size_t j=0; j()); + } + port_message_counts[data_pmc.key(i)] = counts; + } + + Optional message; + if (!dict["message"].is_nil()) { + auto mpp_message = MPPMessage::from_bytes(dict["message"]); + message = Message(mpp_message.timestamp, mpp_message.data); + if (mpp_message.next_timestamp.is_set()) { + message.get().set_next_timestamp(mpp_message.next_timestamp.get()); + } + if (!mpp_message.settings_overlay.is_nil()) { + message.get().set_settings( + mpp_message.settings_overlay.as<::ymmsl::Settings>()); + } + } + + return Snapshot( + triggers, + dict["wallclock_time"].as(), + port_message_counts, + dict["is_final_snapshot"].as(), + message, + dict["settings_overlay"].as<::ymmsl::Settings>() + ); +} + +DataConstRef Snapshot::to_bytes() const { + Data triggers = Data::nils(triggers_.size()); + for (std::size_t i=0; i(), + msg.has_settings() ? msg.settings() : Data(), + 0, + -1.0, + msg.data()); + auto encoded = mpp_msg.encoded(); + // unfortunately need to create a copy of the byte array here... + message = Data::byte_array(encoded.size()); + memcpy(message.as_byte_array(), encoded.as_byte_array(), encoded.size()); + } + + Data dict = Data::dict( + "triggers", triggers, + "wallclock_time", wallclock_time_, + "port_message_counts", port_message_counts, + "is_final_snapshot", is_final_snapshot_, + "message", message, + "settings_overlay", Data(settings_overlay_) + ); + + msgpack::sbuffer sbuf; + msgpack::pack(sbuf, dict); + + auto bytes = Data::byte_array(sbuf.size()); + memcpy(bytes.as_byte_array(), sbuf.data(), sbuf.size()); + + return bytes; +} + +} } diff --git a/libmuscle/cpp/src/libmuscle/snapshot.hpp b/libmuscle/cpp/src/libmuscle/snapshot.hpp new file mode 100644 index 00000000..97273d92 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/snapshot.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +#include + +namespace libmuscle { namespace impl { + +/** Snapshot data structure. + */ +class Snapshot { + public: + Snapshot( + std::vector triggers, + double wallclock_time, + std::unordered_map> port_message_counts, + bool is_final_snapshot, + Optional message, + ::ymmsl::Settings settings_overlay); + + /** Create a snapshot object from binary data. + * + * @param data binary data representing the snapshot. Note that this must + * exclude the versioning byte. + */ + static Snapshot from_bytes(DataConstRef const & data); + + /** Convert the snapshot object to binary data. + * + * @return DataConstRef Binary data representing the snapshot. Note that this + * excludes the versioning byte. + */ + DataConstRef to_bytes() const; + + std::vector triggers_; + double wallclock_time_; + std::unordered_map> port_message_counts_; + bool is_final_snapshot_; + Optional message_; + ::ymmsl::Settings settings_overlay_; +}; + +} } diff --git a/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp b/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp new file mode 100644 index 00000000..227e7df5 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp @@ -0,0 +1,59 @@ +#include + +#include + +using libmuscle::impl::Data; +using libmuscle::impl::Message; +using libmuscle::impl::Snapshot; + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +Snapshot create_snapshot() { + ::ymmsl::Settings settings; + settings["test"] = 1; + return Snapshot( + {"test triggers"}, + 15.3, + {{"in", {1}}, {"out", {4}}, {"muscle_settings_in", {0}}}, + true, + Message(1.2, "test_data"), + settings + ); +} + +TEST(libmuscle_snapshot, test_snapshot) { + auto snapshot = create_snapshot(); + + ASSERT_EQ(snapshot.triggers_.size(), 1); + ASSERT_STREQ(snapshot.triggers_[0].c_str(), "test triggers"); + ASSERT_DOUBLE_EQ(snapshot.wallclock_time_, 15.3); + ASSERT_EQ(snapshot.port_message_counts_.size(), 3); + ASSERT_EQ(snapshot.port_message_counts_.at("in"), std::vector({1})); + ASSERT_EQ(snapshot.port_message_counts_.at("out"), std::vector({4})); + ASSERT_EQ(snapshot.port_message_counts_.at("muscle_settings_in"), + std::vector({0})); + ASSERT_TRUE(snapshot.is_final_snapshot_); + ASSERT_TRUE(snapshot.message_.is_set()); + ASSERT_DOUBLE_EQ(snapshot.message_.get().timestamp(), 1.2); + ASSERT_FALSE(snapshot.message_.get().has_next_timestamp()); + ASSERT_FALSE(snapshot.message_.get().has_settings()); + ASSERT_STREQ(snapshot.message_.get().data().as().c_str(), + "test_data"); + ASSERT_EQ(snapshot.settings_overlay_["test"], 1); + + auto binary_snapshot = snapshot.to_bytes(); + Snapshot snapshot2 = Snapshot::from_bytes(binary_snapshot); + + ASSERT_EQ(snapshot.triggers_, snapshot2.triggers_); + ASSERT_EQ(snapshot.wallclock_time_, snapshot2.wallclock_time_); + ASSERT_EQ(snapshot.port_message_counts_, snapshot2.port_message_counts_); + ASSERT_EQ(snapshot.is_final_snapshot_, snapshot2.is_final_snapshot_); + ASSERT_EQ(snapshot.message_.get().timestamp(), + snapshot2.message_.get().timestamp()); + ASSERT_EQ(snapshot.message_.get().data().as(), + snapshot2.message_.get().data().as()); + ASSERT_EQ(snapshot.settings_overlay_, snapshot2.settings_overlay_); +} From dc31aaa982e993f517d6ee4eac5ef5e66fab6114 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 3 Feb 2023 15:56:44 +0100 Subject: [PATCH 044/188] Complete porting of snapshot.py to C++ --- libmuscle/cpp/src/libmuscle/snapshot.cpp | 39 ++++++++++++++++++ libmuscle/cpp/src/libmuscle/snapshot.hpp | 27 ++++++++++++ .../cpp/src/libmuscle/tests/test_snapshot.cpp | 41 +++++++++++++++++++ 3 files changed, 107 insertions(+) diff --git a/libmuscle/cpp/src/libmuscle/snapshot.cpp b/libmuscle/cpp/src/libmuscle/snapshot.cpp index a2e7fa65..f1c3a233 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot.cpp +++ b/libmuscle/cpp/src/libmuscle/snapshot.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include @@ -121,4 +123,41 @@ DataConstRef Snapshot::to_bytes() const { return bytes; } +SnapshotMetadata::SnapshotMetadata( + std::vector triggers, + double wallclock_time, + double timestamp, + Optional next_timestamp, + std::unordered_map> port_message_counts, + bool is_final_snapshot, + std::string snapshot_filename) + : triggers_(triggers) + , wallclock_time_(wallclock_time) + , timestamp_(timestamp) + , next_timestamp_(next_timestamp) + , port_message_counts_(port_message_counts) + , is_final_snapshot_(is_final_snapshot) + , snapshot_filename_(snapshot_filename) + {} + +SnapshotMetadata SnapshotMetadata::from_snapshot( + Snapshot const & snapshot, std::string snapshot_filename) { + double timestamp = NAN; + Optional next_timestamp; + if (snapshot.message_.is_set()) { + timestamp = snapshot.message_.get().timestamp(); + if (snapshot.message_.get().has_next_timestamp()) { + next_timestamp = snapshot.message_.get().next_timestamp(); + } + } + return SnapshotMetadata( + snapshot.triggers_, + snapshot.wallclock_time_, + timestamp, + next_timestamp, + snapshot.port_message_counts_, + snapshot.is_final_snapshot_, + snapshot_filename); +} + } } diff --git a/libmuscle/cpp/src/libmuscle/snapshot.hpp b/libmuscle/cpp/src/libmuscle/snapshot.hpp index 97273d92..b934435c 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot.hpp +++ b/libmuscle/cpp/src/libmuscle/snapshot.hpp @@ -46,4 +46,31 @@ class Snapshot { ::ymmsl::Settings settings_overlay_; }; +/** Metadata of a snapshot for sending to the muscle_manager. + */ +class SnapshotMetadata { + public: + SnapshotMetadata( + std::vector triggers, + double wallclock_time, + double timestamp, + Optional next_timestamp, + std::unordered_map> port_message_counts, + bool is_final_snapshot, + std::string snapshot_filename); + + /** Create snapshot metadata from the given snapshot and filename + */ + static SnapshotMetadata from_snapshot( + Snapshot const & snapshot, std::string snapshot_filename); + + std::vector triggers_; + double wallclock_time_; + double timestamp_; + Optional next_timestamp_; + std::unordered_map> port_message_counts_; + bool is_final_snapshot_; + std::string snapshot_filename_; +}; + } } diff --git a/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp b/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp index 227e7df5..67692c29 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp @@ -4,7 +4,9 @@ using libmuscle::impl::Data; using libmuscle::impl::Message; +using libmuscle::impl::Optional; using libmuscle::impl::Snapshot; +using libmuscle::impl::SnapshotMetadata; int main(int argc, char *argv[]) { ::testing::InitGoogleTest(&argc, argv); @@ -57,3 +59,42 @@ TEST(libmuscle_snapshot, test_snapshot) { snapshot2.message_.get().data().as()); ASSERT_EQ(snapshot.settings_overlay_, snapshot2.settings_overlay_); } + +TEST(libmuscle_snapshot, test_snapshot_metadata) { + auto snapshot = create_snapshot(); + + auto metadata = SnapshotMetadata::from_snapshot(snapshot, "test"); + ASSERT_EQ(metadata.triggers_, snapshot.triggers_); + ASSERT_EQ(metadata.wallclock_time_, snapshot.wallclock_time_); + ASSERT_EQ(metadata.port_message_counts_, snapshot.port_message_counts_); + ASSERT_EQ(metadata.is_final_snapshot_, snapshot.is_final_snapshot_); + ASSERT_EQ(metadata.timestamp_, snapshot.message_.get().timestamp()); + ASSERT_EQ(metadata.next_timestamp_.is_set(), + snapshot.message_.get().has_next_timestamp()); + ASSERT_EQ(metadata.snapshot_filename_, "test"); +} + +TEST(libmuscle_snapshot, test_message_with_settings) { + ::ymmsl::Settings settings; + settings["settings"] = true; + Message message(1.0, 2.0, "test_data", settings); + Snapshot snapshot ({}, 0, {}, false, message, {}); + ASSERT_TRUE(snapshot.message_.get().settings().at("settings").as()); + + auto binary_snapshot = snapshot.to_bytes(); + Snapshot snapshot2 = Snapshot::from_bytes(binary_snapshot); + + ASSERT_TRUE(snapshot2.message_.get().settings().at("settings").as()); +} + +TEST(libmuscle_snapshot, test_implicit_snapshot) { + Optional message; + Snapshot snapshot({}, 0, {}, true, message, {}); + ASSERT_FALSE(snapshot.message_.is_set()); + + + auto binary_snapshot = snapshot.to_bytes(); + Snapshot snapshot2 = Snapshot::from_bytes(binary_snapshot); + + ASSERT_FALSE(snapshot2.message_.is_set()); +} From 440748cd2ba12cb7771f92e2b46161781794554a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 7 Feb 2023 14:22:56 +0100 Subject: [PATCH 045/188] Port SnapshotManager to C++ --- libmuscle/cpp/src/libmuscle/snapshot.cpp | 8 +- libmuscle/cpp/src/libmuscle/snapshot.hpp | 12 +- .../cpp/src/libmuscle/snapshot_manager.cpp | 246 ++++++++++++++++++ .../cpp/src/libmuscle/snapshot_manager.hpp | 110 ++++++++ .../libmuscle/tests/test_snapshot_manager.cpp | 230 ++++++++++++++++ .../python/libmuscle/snapshot_manager.py | 16 +- 6 files changed, 606 insertions(+), 16 deletions(-) create mode 100644 libmuscle/cpp/src/libmuscle/snapshot_manager.cpp create mode 100644 libmuscle/cpp/src/libmuscle/snapshot_manager.hpp create mode 100644 libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp diff --git a/libmuscle/cpp/src/libmuscle/snapshot.cpp b/libmuscle/cpp/src/libmuscle/snapshot.cpp index f1c3a233..d290cc30 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot.cpp +++ b/libmuscle/cpp/src/libmuscle/snapshot.cpp @@ -124,13 +124,13 @@ DataConstRef Snapshot::to_bytes() const { } SnapshotMetadata::SnapshotMetadata( - std::vector triggers, + std::vector const & triggers, double wallclock_time, double timestamp, Optional next_timestamp, - std::unordered_map> port_message_counts, + std::unordered_map> const & port_message_counts, bool is_final_snapshot, - std::string snapshot_filename) + std::string const & snapshot_filename) : triggers_(triggers) , wallclock_time_(wallclock_time) , timestamp_(timestamp) @@ -141,7 +141,7 @@ SnapshotMetadata::SnapshotMetadata( {} SnapshotMetadata SnapshotMetadata::from_snapshot( - Snapshot const & snapshot, std::string snapshot_filename) { + Snapshot const & snapshot, std::string const & snapshot_filename) { double timestamp = NAN; Optional next_timestamp; if (snapshot.message_.is_set()) { diff --git a/libmuscle/cpp/src/libmuscle/snapshot.hpp b/libmuscle/cpp/src/libmuscle/snapshot.hpp index b934435c..815d0fae 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot.hpp +++ b/libmuscle/cpp/src/libmuscle/snapshot.hpp @@ -16,6 +16,10 @@ namespace libmuscle { namespace impl { */ class Snapshot { public: + enum class VersionByte : char { + MESSAGEPACK = '1' + }; + Snapshot( std::vector triggers, double wallclock_time, @@ -51,18 +55,18 @@ class Snapshot { class SnapshotMetadata { public: SnapshotMetadata( - std::vector triggers, + std::vector const & triggers, double wallclock_time, double timestamp, Optional next_timestamp, - std::unordered_map> port_message_counts, + std::unordered_map> const & port_message_counts, bool is_final_snapshot, - std::string snapshot_filename); + std::string const & snapshot_filename); /** Create snapshot metadata from the given snapshot and filename */ static SnapshotMetadata from_snapshot( - Snapshot const & snapshot, std::string snapshot_filename); + Snapshot const & snapshot, std::string const & snapshot_filename); std::vector triggers_; double wallclock_time_; diff --git a/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp b/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp new file mode 100644 index 00000000..cd5534d4 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp @@ -0,0 +1,246 @@ +#include + +#include + +#include +#include +#include +#include + +// Note: using POSIX for filesystem calls +// Could be upgraded to std::filesystem when targeting C++17 or later +#include +#include + +#define MAX_FILE_EXISTS_CHECK 10000 + +using namespace std::string_literals; + +namespace libmuscle { namespace impl { + +SnapshotManager::SnapshotManager( + ymmsl::Reference const & instance_id, + MMPClient & manager, + Communicator & communicator, + Logger & logger) + : instance_id_(instance_id) + , manager_(manager) + , communicator_(communicator) + , logger_(logger) + , resume_from_snapshot_() + , resume_overlay_() + , next_snapshot_num_(1) + , snapshot_directory_() +{ + // replace identifier[i] by identifier-i to use in snapshot file name + // using a dash (-) because that is not allowed in Identifiers + safe_id_ = static_cast(instance_id_); + std::replace(safe_id_.begin(), safe_id_.end(), '[', '-'); + safe_id_.erase( + std::remove(safe_id_.begin(), safe_id_.end(), ']'), + safe_id_.end()); +} + +namespace { + +// Adapted from https://stackoverflow.com/a/2203853 +std::string get_working_path() +{ + char temp [ PATH_MAX ]; + + if ( getcwd(temp, PATH_MAX) != 0) + return std::string ( temp ); + + int error = errno; + std::ostringstream str; + str << "Error retrieving current working directory: " << error; + throw std::runtime_error(str.str()); +} + +} + +Optional SnapshotManager::prepare_resume( + Optional const & resume_snapshot, + Optional const & snapshot_directory) { + Optional result; + + if (snapshot_directory.is_set() && !snapshot_directory.get().empty()) { + snapshot_directory_ = snapshot_directory.get(); + } else { + // store snapshots in current working directory + snapshot_directory_ = get_working_path(); + } + + if (resume_snapshot.is_set()) { + auto snapshot = load_snapshot_from_file(resume_snapshot.get()); + + if (snapshot.message_.is_set()) { + // snapshot.message is not set for implicit snapshots + resume_from_snapshot_ = snapshot; + result = snapshot.message_.get().timestamp(); + } + resume_overlay_ = snapshot.settings_overlay_; + + // TODO: implement this on Communicator! + // communicator_.restore_message_counts(snapshot.port_message_counts_); + // Store a copy of the snapshot in the current run directory + auto path = store_snapshot_(snapshot); + auto metadata = SnapshotMetadata::from_snapshot(snapshot, path); + // TODO: implement this on MMPClient! + // manager_.submit_snapshot_metadata(instance_id_, metadata); + } + + return result; +} + +bool SnapshotManager::resuming_from_intermediate() { + return (resume_from_snapshot_.is_set() && + !resume_from_snapshot_.get().is_final_snapshot_); +} + +bool SnapshotManager::resuming_from_final() { + return (resume_from_snapshot_.is_set() && + resume_from_snapshot_.get().is_final_snapshot_); +} + +Message SnapshotManager::load_snapshot() { + if (!resume_from_snapshot_.is_set()) + throw std::runtime_error("Error: no snapshot to load."); + return resume_from_snapshot_.get().message_.get(); +} + +double SnapshotManager::save_snapshot( + Optional message, bool final, + std::vector const & triggers, double wallclock_time, + Optional f_init_max_timestamp, + ::ymmsl::Settings settings_overlay) { + // TODO: implement this on Communicator! + //auto port_message_counts = communicator_.get_message_counts(); + std::unordered_map> port_message_counts; + + if (final) { + // Decrease F_INIT port counts by one: F_INIT messages are already + // pre-received, but not yet processed by the user code. Therefore, + // the snapshot state should treat these as not-received. + auto all_ports = communicator_.list_ports(); + auto ports = all_ports.find(::ymmsl::Operator::F_INIT); + if (ports != all_ports.end()) { + for (auto const & port_name : ports->second) { + for (auto & count : port_message_counts[port_name]) { + --count; + } + } + } + if (communicator_.settings_in_connected()) { + for (auto & count : port_message_counts["muscle_settings_in"]) { + --count; + } + } + } + + Snapshot snapshot( + triggers, wallclock_time, port_message_counts, final, message, + settings_overlay); + + auto path = store_snapshot_(snapshot); + auto metadata = SnapshotMetadata::from_snapshot(snapshot, path); + // TODO: implement this on MMPClient! + // manager_.submit_snapshot_metadata(instance_id_, metadata); + + double timestamp = message.is_set() ? message.get().timestamp() : -INFINITY; + if (final && f_init_max_timestamp.is_set()) { + // For final snapshots f_init_max_snapshot is the reference time (see + // should_save_final_snapshot). + timestamp = f_init_max_timestamp.get(); + } + return timestamp; +} + +Snapshot SnapshotManager::load_snapshot_from_file( + std::string const & snapshot_location) { + logger_.debug("Loading snapshot from " + snapshot_location); + + std::ifstream snapshot_file(snapshot_location, std::ios::binary); + if (!snapshot_file.good()) { + throw std::runtime_error( + "Unable to load snapshot: " + snapshot_location + + " cannot be opened. Please ensure this path exists and can be read."); + } + + // get the size of the file + snapshot_file.seekg(0, std::ios::end); + std::size_t fsize = snapshot_file.tellg(); + snapshot_file.seekg(0, std::ios::beg); + + if (fsize < 1) { + throw std::runtime_error( + "Unable to load snapshot: " + snapshot_location + " is an empty file."); + } + + char version; + Data data = Data::byte_array(fsize - 1); + snapshot_file.read(&version, 1); + snapshot_file.read(data.as_byte_array(), fsize - 1); + if (!snapshot_file.good()) { + throw std::runtime_error( + "Unable to load snapshot file " + snapshot_location + + ". I/O error while reading file."); + } + + switch (static_cast(version)) { + case Snapshot::VersionByte::MESSAGEPACK: + return Snapshot::from_bytes(data); + default: + throw std::runtime_error( + "Unable to load snapshot file " + snapshot_location + + ": unknown version of snapshot file. Was the file saved with a" + " different version of libmuscle or edited?"); + } +} + +std::string SnapshotManager::store_snapshot_(Snapshot const & snapshot) { + logger_.debug("Saving snapshot to " + snapshot_directory_); + std::ofstream snapshot_file; + std::string fpath; + for (int i=0; i(Snapshot::VersionByte::MESSAGEPACK); + auto data = snapshot.to_bytes(); + snapshot_file.write(data.as_byte_array(), data.size()); + snapshot_file.close(); + if (!snapshot_file.good()) { + throw std::runtime_error("I/O error while writing snapshot to file " + fpath); + } + return fpath; +} + +} } diff --git a/libmuscle/cpp/src/libmuscle/snapshot_manager.hpp b/libmuscle/cpp/src/libmuscle/snapshot_manager.hpp new file mode 100644 index 00000000..7b8915fd --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/snapshot_manager.hpp @@ -0,0 +1,110 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace libmuscle { namespace impl { + +/** Manages information on snapshots for the Instance + * + * Implements the saving and loading of snapshots in the checkpointing API. + */ +class SnapshotManager { + public: + /** Create a new snapshot manager + * + * @param instance_id The id of this instance. + * @param manager The client used to submit data to the manager. + * @param commnicator The communicator belonging to this instance. + */ + SnapshotManager( + ymmsl::Reference const & instance_id, + MMPClient & manager, + Communicator & communicator, + Logger & logger); + + /** Apply checkpoint info received from the manager. + * + * If there is a snapshot to resume from, this loads it and does + * any resume work that libmuscle should do, including restoring + * message counts and storing the resumed-from snapshot again as + * our first snapshot. + * + * @param resume_snapshot Snapshot to resume from (or None if not resuming) + * @param snapshot_directory directory to save snapshots in + * @return Optional Time at which the initial snapshot was saved, + * if resuming. + */ + Optional prepare_resume( + Optional const & resume_snapshot, + Optional const & snapshot_directory); + + /** Check whether we have an intermediate snapshot. + * + * Doesn't say whether we should resume now, just that we were + * given an intermediate snapshot to resume from by the manager. + */ + bool resuming_from_intermediate(); + + /** Check whether we have a final snapshot. + * + * Doesn't say whether we should resume now, just that we were + * given an intermediate snapshot to resume from by the manager. + */ + bool resuming_from_final(); + + /** Get the Message to resume from. + */ + Message load_snapshot(); + + /** Save a (final) snapshot. + * + * @param message Message object representing the snapshot. + * @param final True iff called from save_final_snapshot. + * @param triggers Description of checkpoints that triggered this. + * @param wallclock_time Wallclock time when saving. + * @param f_init_max_timestamp Timestamp for final snapshots. + * @param settings_overlay Current settings overlay. + * @return double Simulation time at which the snapshot was made. + */ + double save_snapshot( + Optional message, bool final, + std::vector const & triggers, double wallclock_time, + Optional f_init_max_timestamp, + ::ymmsl::Settings settings_overlay); + + /** Load a previously stored snapshot from the filesystem. + * + * @param snapshot_location path where the snapshot is stored. + * @return Snapshot + */ + Snapshot load_snapshot_from_file(std::string const & snapshot_location); + + private: + ymmsl::Reference const & instance_id_; + MMPClient & manager_; + Communicator & communicator_; + Logger & logger_; + Optional resume_from_snapshot_; + ::ymmsl::Settings resume_overlay_; + int next_snapshot_num_; + std::string snapshot_directory_; + std::string safe_id_; + + /** Store a snapshot on the filesystem. + * + * @param snapshot Snapshot to store. + * @return std::string Path where the snapshot is stored. + */ + std::string store_snapshot_(Snapshot const & snapshot); +}; + +} } diff --git a/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp b/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp new file mode 100644 index 00000000..22688e43 --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp @@ -0,0 +1,230 @@ +// Inject mocks +#define LIBMUSCLE_MOCK_COMMUNICATOR +#define LIBMUSCLE_MOCK_LOGGER +#define LIBMUSCLE_MOCK_MMP_CLIENT + +// into the real implementation, +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// then add mock implementations as needed. +#include +#include +#include + +// Test code dependencies +#include +#include +#include +#include +#include + +#include +#include + +// Note: using POSIX for filesystem calls +// Could be upgraded to std::filesystem when targeting C++17 or later +#include +#include + +using libmuscle::impl::Data; +using libmuscle::impl::Message; +using libmuscle::impl::MockCommunicator; +using libmuscle::impl::MockLogger; +using libmuscle::impl::MockMMPClient; +using libmuscle::impl::Optional; +using libmuscle::impl::Snapshot; +using libmuscle::impl::SnapshotMetadata; +using libmuscle::impl::SnapshotManager; +using ymmsl::Reference; + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +/* Mocks have internal state, which needs to be reset before each test. This + * means that the tests are not reentrant, and cannot be run in parallel. + * It's all fast enough, so that's not a problem. + */ +void reset_mocks() { + MockCommunicator::reset(); + MockMMPClient::reset(); +} + +MockLogger & mock_logger() { + static MockLogger logger; + return logger; +} + +class libmuscle_snapshot_manager : public ::testing::Test { + protected: + void SetUp() override { + auto tmpnam = std::tmpnam(nullptr); + temp_dir_ = std::string(tmpnam); + if (mkdir(tmpnam, 0777) < 0) + throw std::runtime_error(strerror(errno)); + std::cerr << "DEBUG: using temp dir " << temp_dir_ << std::endl; + } + + void TearDown() override { + // simulate rm -rf `temp_dir_` using a file-tree-walk + if (nftw(temp_dir_.c_str(), + [](const char *fpath, const struct stat *sb, + int tflag, struct FTW *ftwbuf)->int{ + if (tflag == FTW_DP) { + std::cerr << "DEBUG: removing dir " << fpath << std::endl; + return rmdir(fpath); + } + if (tflag == FTW_F) { + std::cerr << "DEBUG: removing file " << fpath << std::endl; + return unlink(fpath); + } + std::cerr << "DEBUG: unknown file type " << fpath << std::endl; + return -1; + }, + 3, + FTW_DEPTH) < 0) { + throw std::runtime_error(strerror(errno)); + } + temp_dir_.clear(); + } + + std::string temp_dir_; +}; + +TEST_F(libmuscle_snapshot_manager, test_no_checkpointing) { + reset_mocks(); + + MockCommunicator communicator("test", {}, {}, mock_logger(), {}); + MockMMPClient manager(""); + SnapshotManager snapshot_manager("test", manager, communicator, mock_logger()); + + snapshot_manager.prepare_resume({}, temp_dir_); + ASSERT_FALSE(snapshot_manager.resuming_from_intermediate()); + ASSERT_FALSE(snapshot_manager.resuming_from_final()); +} + +TEST_F(libmuscle_snapshot_manager, test_save_load_snapshot) { + reset_mocks(); + + MockCommunicator communicator("test", {}, {}, mock_logger(), {}); + MockMMPClient manager(""); + Reference instance_id("test[1]"); + + SnapshotManager snapshot_manager(instance_id, manager, communicator, mock_logger()); + + snapshot_manager.prepare_resume({}, temp_dir_); + ASSERT_FALSE(snapshot_manager.resuming_from_intermediate()); + ASSERT_FALSE(snapshot_manager.resuming_from_final()); + + snapshot_manager.save_snapshot( + Message(0.2, "test data"), false, {"test"}, 13.0, {}, {}); + + // TODO: need to implement this on the mocks + // communicator.get_message_counts.assert_called_with() + // manager.submit_snapshot_metadata.assert_called() + // instance, metadata = manager.submit_snapshot_metadata.call_args[0] + // assert instance == instance_id + // assert isinstance(metadata, SnapshotMetadata) + // assert metadata.triggers == ['test'] + // assert metadata.wallclock_time == 13.0 + // assert metadata.timestamp == 0.2 + // assert metadata.next_timestamp is None + // assert metadata.port_message_counts == port_message_counts + // ASSERT_FALSE(metadata.is_final_snapshot); + // snapshot_path = Path(metadata.snapshot_filename) + // assert snapshot_path.parent == tmp_path + // assert snapshot_path.name == 'test-1_1.pack' + + // TODO: get from snapshot metadata instead of hardcoding + std::string snapshot_path = temp_dir_ + "/test-1_1.pack"; + + SnapshotManager snapshot_manager2( + instance_id, manager, communicator, mock_logger()); + snapshot_manager2.prepare_resume(snapshot_path, temp_dir_); + + ASSERT_TRUE(snapshot_manager2.resuming_from_intermediate()); + ASSERT_FALSE(snapshot_manager2.resuming_from_final()); + + auto msg = snapshot_manager2.load_snapshot(); + ASSERT_DOUBLE_EQ(msg.timestamp(), 0.2); + ASSERT_FALSE(msg.has_next_timestamp()); + ASSERT_EQ(msg.data().as(), "test data"); + + snapshot_manager2.save_snapshot( + Message(0.6, "test data2"), true, {"test"}, 42.2, 1.2, {}); + + // TODO: need to implement this on the mocks + // instance, metadata = manager.submit_snapshot_metadata.call_args[0] + // assert instance == instance_id + // assert isinstance(metadata, SnapshotMetadata) + // assert metadata.triggers == ['test'] + // assert metadata.wallclock_time == 42.2 + // assert metadata.timestamp == 0.6 + // assert metadata.next_timestamp is None + // assert metadata.port_message_counts == port_message_counts + // assert metadata.is_final_snapshot + // snapshot_path = Path(metadata.snapshot_filename) + // assert snapshot_path.parent == tmp_path + // assert snapshot_path.name == 'test-1_3.pack' + + ASSERT_TRUE(snapshot_manager2.resuming_from_intermediate()); + ASSERT_FALSE(snapshot_manager2.resuming_from_final()); + snapshot_manager2.load_snapshot(); + ASSERT_TRUE(snapshot_manager2.resuming_from_intermediate()); + ASSERT_FALSE(snapshot_manager2.resuming_from_final()); +} + +TEST_F(libmuscle_snapshot_manager, test_save_load_implicit_snapshot) { + reset_mocks(); + + MockCommunicator communicator("test", {}, {}, mock_logger(), {}); + MockMMPClient manager(""); + Reference instance_id("test[1]"); + + SnapshotManager snapshot_manager(instance_id, manager, communicator, mock_logger()); + + snapshot_manager.prepare_resume({}, temp_dir_); + ASSERT_FALSE(snapshot_manager.resuming_from_intermediate()); + ASSERT_FALSE(snapshot_manager.resuming_from_final()); + + // save implicit snapshot, i.e. Message=not set + snapshot_manager.save_snapshot({}, true, {"implicit"}, 1.0, 1.5, {}); + + // TODO: need to implement this on the mocks + // manager.submit_snapshot_metadata.assert_called_once() + // instance, metadata = manager.submit_snapshot_metadata.call_args[0] + // assert instance == instance_id + // assert isinstance(metadata, SnapshotMetadata) + // snapshot_path = Path(metadata.snapshot_filename) + // manager.submit_snapshot_metadata.reset_mock() + + // TODO: get from snapshot metadata instead of hardcoding + std::string snapshot_path = temp_dir_ + "/test-1_1.pack"; + + SnapshotManager snapshot_manager2( + instance_id, manager, communicator, mock_logger()); + + snapshot_manager2.prepare_resume(snapshot_path, temp_dir_); + // TODO: need to implement this on the mocks + // communicator.restore_message_counts.assert_called_with(port_message_counts) + // manager.submit_snapshot_metadata.assert_called_once() + // manager.submit_snapshot_metadata.reset_mock() + + ASSERT_FALSE(snapshot_manager2.resuming_from_intermediate()); + ASSERT_FALSE(snapshot_manager2.resuming_from_final()); + snapshot_manager2.save_snapshot({}, true, {"implicit"}, 12.3, 2.5, {}); + // TODO: need to implement this on the mocks + // manager.submit_snapshot_metadata.assert_called_once() +} diff --git a/libmuscle/python/libmuscle/snapshot_manager.py b/libmuscle/python/libmuscle/snapshot_manager.py index c4d1716e..fbdaef5b 100644 --- a/libmuscle/python/libmuscle/snapshot_manager.py +++ b/libmuscle/python/libmuscle/snapshot_manager.py @@ -21,8 +21,7 @@ class SnapshotManager: """Manages information on snapshots for the Instance - Implements the public checkpointing API with handoffs to - :class:`TriggerManager` for checkpoint triggers. + Implements the saving and loading of snapshots in the checkpointing API. """ def __init__(self, @@ -125,9 +124,10 @@ def save_snapshot( triggers: Description of checkpoints that triggered this. wallclock_time: Wallclock time when saving. f_init_max_timestamp: Timestamp for final snapshots. + settings_overlay: Current settings overlay. Returns: - Simulation time at which the snapshot was made + Simulation time at which the snapshot was made. """ port_message_counts = self._communicator.get_message_counts() if final: @@ -159,10 +159,10 @@ def save_snapshot( @staticmethod def load_snapshot_from_file(snapshot_location: Path) -> Snapshot: - """Load a previously stored snapshot from the filesystem + """Load a previously stored snapshot from the filesystem. Args: - snapshot_location: path where the snapshot is stored + snapshot_location: path where the snapshot is stored. """ _logger.debug(f'Loading snapshot from {snapshot_location}') if not snapshot_location.is_file(): @@ -184,13 +184,13 @@ def load_snapshot_from_file(snapshot_location: Path) -> Snapshot: ' edited?') def __store_snapshot(self, snapshot: Snapshot) -> Path: - """Store a snapshot on the filesystem + """Store a snapshot on the filesystem. Args: - snapshot: snapshot to store + snapshot: Snapshot to store. Returns: - Path where the snapshot is stored + Path where the snapshot is stored. """ _logger.debug(f'Saving snapshot to {self._snapshot_directory}') for _ in range(_MAX_FILE_EXISTS_CHECK): From 45a028ad47a3011d482bfd949f760e8b2fd53dac Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 13 Feb 2023 08:53:42 +0100 Subject: [PATCH 046/188] Update snapshot manager test for profiling changes --- .../libmuscle/tests/test_snapshot_manager.cpp | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp b/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp index 22688e43..a3c1fb36 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp @@ -2,6 +2,7 @@ #define LIBMUSCLE_MOCK_COMMUNICATOR #define LIBMUSCLE_MOCK_LOGGER #define LIBMUSCLE_MOCK_MMP_CLIENT +#define LIBMUSCLE_MOCK_PROFILER // into the real implementation, #include @@ -21,6 +22,7 @@ #include #include #include +#include // Test code dependencies #include @@ -42,6 +44,7 @@ using libmuscle::impl::Message; using libmuscle::impl::MockCommunicator; using libmuscle::impl::MockLogger; using libmuscle::impl::MockMMPClient; +using libmuscle::impl::MockProfiler; using libmuscle::impl::Optional; using libmuscle::impl::Snapshot; using libmuscle::impl::SnapshotMetadata; @@ -67,6 +70,11 @@ MockLogger & mock_logger() { return logger; } +MockProfiler & mock_profiler() { + static MockProfiler profiler; + return profiler; +} + class libmuscle_snapshot_manager : public ::testing::Test { protected: void SetUp() override { @@ -106,8 +114,8 @@ class libmuscle_snapshot_manager : public ::testing::Test { TEST_F(libmuscle_snapshot_manager, test_no_checkpointing) { reset_mocks(); - MockCommunicator communicator("test", {}, {}, mock_logger(), {}); - MockMMPClient manager(""); + MockCommunicator communicator("test", {}, {}, mock_logger(), mock_profiler()); + MockMMPClient manager("instance", ""); SnapshotManager snapshot_manager("test", manager, communicator, mock_logger()); snapshot_manager.prepare_resume({}, temp_dir_); @@ -118,8 +126,8 @@ TEST_F(libmuscle_snapshot_manager, test_no_checkpointing) { TEST_F(libmuscle_snapshot_manager, test_save_load_snapshot) { reset_mocks(); - MockCommunicator communicator("test", {}, {}, mock_logger(), {}); - MockMMPClient manager(""); + MockCommunicator communicator("test", {}, {}, mock_logger(), mock_profiler()); + MockMMPClient manager("instance", ""); Reference instance_id("test[1]"); SnapshotManager snapshot_manager(instance_id, manager, communicator, mock_logger()); @@ -189,8 +197,8 @@ TEST_F(libmuscle_snapshot_manager, test_save_load_snapshot) { TEST_F(libmuscle_snapshot_manager, test_save_load_implicit_snapshot) { reset_mocks(); - MockCommunicator communicator("test", {}, {}, mock_logger(), {}); - MockMMPClient manager(""); + MockCommunicator communicator("test", {}, {}, mock_logger(), mock_profiler()); + MockMMPClient manager("instance", ""); Reference instance_id("test[1]"); SnapshotManager snapshot_manager(instance_id, manager, communicator, mock_logger()); From c520edacf90832d8ca044677442d1e2a32aa09f1 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 13 Feb 2023 15:16:55 +0100 Subject: [PATCH 047/188] Use Data::list initializer instead of Data::nils --- .../tests/test_checkpoint_triggers.cpp | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/tests/test_checkpoint_triggers.cpp b/libmuscle/cpp/src/libmuscle/tests/test_checkpoint_triggers.cpp index 2a53f315..8610d905 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_checkpoint_triggers.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_checkpoint_triggers.cpp @@ -2,6 +2,8 @@ #include +#include + using libmuscle::impl::TriggerManager; using libmuscle::impl::AtCheckpointTrigger; using libmuscle::impl::RangeCheckpointTrigger; @@ -92,9 +94,9 @@ TEST(libmuscle_checkpoint_triggers, test_range_checkpoint_trigger_default_start) } TEST(libmuscle_checkpoint_triggers, test_combined_checkpoint_trigger_every_at) { - auto rules = Data::nils(2); - rules[0] = Data::dict("start", Data(), "stop", Data(), "every", 10); - rules[1] = Data::dict("at", Data::list(3, 7, 13, 17)); + auto rules = Data::list( + Data::dict("start", Data(), "stop", Data(), "every", 10), + Data::dict("at", Data::list(3, 7, 13, 17))); CombinedCheckpointTriggers trigger(rules); ASSERT_DOUBLE_EQ(trigger.next_checkpoint(-11.).get(), -10); @@ -114,10 +116,10 @@ TEST(libmuscle_checkpoint_triggers, test_combined_checkpoint_trigger_every_at) { } TEST(libmuscle_checkpoint_triggers, test_combined_checkpoint_trigger_at_ranges) { - auto rules = Data::nils(3); - rules[0] = Data::dict("at", Data::list(3, 7, 13, 17)); - rules[1] = Data::dict("start", 0, "stop", 20, "every", 5); - rules[2] = Data::dict("start", 20, "stop", 100, "every", 20); + auto rules = Data::list( + Data::dict("at", Data::list(3, 7, 13, 17)), + Data::dict("start", 0, "stop", 20, "every", 5), + Data::dict("start", 20, "stop", 100, "every", 20)); CombinedCheckpointTriggers trigger(rules); ASSERT_DOUBLE_EQ(trigger.next_checkpoint(-11.).get(), 0); @@ -149,8 +151,7 @@ TEST(libmuscle_checkpoint_triggers, test_trigger_manager_reference_time) { auto encoded_checkpoints = Data::dict( "at_end", true, "wallclock_time", Data::list(), - "simulation_time", Data::list() - ); + "simulation_time", Data::list()); auto start = std::chrono::steady_clock::now(); double ref_elapsed = 15.0; @@ -169,15 +170,10 @@ TEST(libmuscle_checkpoint_triggers, test_trigger_manager) { double ref_elapsed = 0.0; TriggerManager trigger_manager; - auto wallclock_time_list = Data::nils(1); - wallclock_time_list[0] = Data::dict("at", Data::list(1e-12)); - auto simulation_time_list = Data::nils(1); - simulation_time_list[0] = Data::dict("at", Data::list(1, 3, 5)); auto encoded_checkpoints = Data::dict( "at_end", true, - "wallclock_time", wallclock_time_list, - "simulation_time", simulation_time_list - ); + "wallclock_time", Data::list(Data::dict("at", Data::list(1e-12))), + "simulation_time", Data::list(Data::dict("at", Data::list(1, 3, 5)))); trigger_manager.set_checkpoint_info(ref_elapsed, encoded_checkpoints); ASSERT_TRUE(trigger_manager.should_save_snapshot(0.1)); @@ -209,8 +205,7 @@ TEST(libmuscle_checkpoint_triggers, test_no_checkpointing) { auto encoded_checkpoints = Data::dict( "at_end", false, "wallclock_time", Data::list(), - "simulation_time", Data::list() - ); + "simulation_time", Data::list()); trigger_manager.set_checkpoint_info(0.0, encoded_checkpoints); ASSERT_FALSE(trigger_manager.should_save_snapshot(1)); ASSERT_FALSE(trigger_manager.should_save_snapshot(5000)); From 890bd4daa22b3e2b2041be6240ee62580f694db4 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 13 Feb 2023 15:18:49 +0100 Subject: [PATCH 048/188] Prevent a memcpy during snapshot creation --- libmuscle/cpp/src/libmuscle/snapshot.cpp | 27 ++++++++++++++---------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/snapshot.cpp b/libmuscle/cpp/src/libmuscle/snapshot.cpp index d290cc30..ebb5a9e1 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot.cpp +++ b/libmuscle/cpp/src/libmuscle/snapshot.cpp @@ -86,7 +86,8 @@ DataConstRef Snapshot::to_bytes() const { port_message_counts[kv.first] = counts; } - Data message; + Data dict; + // Note setting dict in two branches, to avoid a memcopy of the encoded MMPMessage if (message_.is_set()) { auto msg = message_.get(); MPPMessage mpp_msg( @@ -99,20 +100,24 @@ DataConstRef Snapshot::to_bytes() const { 0, -1.0, msg.data()); - auto encoded = mpp_msg.encoded(); - // unfortunately need to create a copy of the byte array here... - message = Data::byte_array(encoded.size()); - memcpy(message.as_byte_array(), encoded.as_byte_array(), encoded.size()); - } - - Data dict = Data::dict( + // Initializing a Data::dict with a DataConstRef is allowed, but assignment + // after creation is not + dict = Data::dict( "triggers", triggers, "wallclock_time", wallclock_time_, "port_message_counts", port_message_counts, "is_final_snapshot", is_final_snapshot_, - "message", message, - "settings_overlay", Data(settings_overlay_) - ); + "message", mpp_msg.encoded(), + "settings_overlay", Data(settings_overlay_)); + } else { + dict = Data::dict( + "triggers", triggers, + "wallclock_time", wallclock_time_, + "port_message_counts", port_message_counts, + "is_final_snapshot", is_final_snapshot_, + "message", Data(), + "settings_overlay", Data(settings_overlay_)); + } msgpack::sbuffer sbuf; msgpack::pack(sbuf, dict); From f100366c961ebc52b4b5439ca394f8db9599b808 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 14 Feb 2023 11:15:01 +0100 Subject: [PATCH 049/188] Remove _ from public member attributes --- libmuscle/cpp/src/libmuscle/snapshot.cpp | 78 +++++++++---------- libmuscle/cpp/src/libmuscle/snapshot.hpp | 26 +++---- .../cpp/src/libmuscle/snapshot_manager.cpp | 12 +-- .../cpp/src/libmuscle/tests/test_snapshot.cpp | 70 ++++++++--------- 4 files changed, 93 insertions(+), 93 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/snapshot.cpp b/libmuscle/cpp/src/libmuscle/snapshot.cpp index ebb5a9e1..35168740 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot.cpp +++ b/libmuscle/cpp/src/libmuscle/snapshot.cpp @@ -18,12 +18,12 @@ Snapshot::Snapshot( Optional message, ::ymmsl::Settings settings_overlay ) - : triggers_(triggers) - , wallclock_time_(wallclock_time) - , port_message_counts_(port_message_counts) - , is_final_snapshot_(is_final_snapshot) - , message_(message) - , settings_overlay_(settings_overlay) + : triggers(triggers) + , wallclock_time(wallclock_time) + , port_message_counts(port_message_counts) + , is_final_snapshot(is_final_snapshot) + , message(message) + , settings_overlay(settings_overlay) {} Snapshot Snapshot::from_bytes(DataConstRef const & data) { @@ -72,24 +72,24 @@ Snapshot Snapshot::from_bytes(DataConstRef const & data) { } DataConstRef Snapshot::to_bytes() const { - Data triggers = Data::nils(triggers_.size()); - for (std::size_t i=0; i> const & port_message_counts, bool is_final_snapshot, std::string const & snapshot_filename) - : triggers_(triggers) - , wallclock_time_(wallclock_time) - , timestamp_(timestamp) - , next_timestamp_(next_timestamp) - , port_message_counts_(port_message_counts) - , is_final_snapshot_(is_final_snapshot) - , snapshot_filename_(snapshot_filename) + : triggers(triggers) + , wallclock_time(wallclock_time) + , timestamp(timestamp) + , next_timestamp(next_timestamp) + , port_message_counts(port_message_counts) + , is_final_snapshot(is_final_snapshot) + , snapshot_filename(snapshot_filename) {} SnapshotMetadata SnapshotMetadata::from_snapshot( Snapshot const & snapshot, std::string const & snapshot_filename) { double timestamp = NAN; Optional next_timestamp; - if (snapshot.message_.is_set()) { - timestamp = snapshot.message_.get().timestamp(); - if (snapshot.message_.get().has_next_timestamp()) { - next_timestamp = snapshot.message_.get().next_timestamp(); + if (snapshot.message.is_set()) { + timestamp = snapshot.message.get().timestamp(); + if (snapshot.message.get().has_next_timestamp()) { + next_timestamp = snapshot.message.get().next_timestamp(); } } return SnapshotMetadata( - snapshot.triggers_, - snapshot.wallclock_time_, + snapshot.triggers, + snapshot.wallclock_time, timestamp, next_timestamp, - snapshot.port_message_counts_, - snapshot.is_final_snapshot_, + snapshot.port_message_counts, + snapshot.is_final_snapshot, snapshot_filename); } diff --git a/libmuscle/cpp/src/libmuscle/snapshot.hpp b/libmuscle/cpp/src/libmuscle/snapshot.hpp index 815d0fae..32e1428c 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot.hpp +++ b/libmuscle/cpp/src/libmuscle/snapshot.hpp @@ -42,12 +42,12 @@ class Snapshot { */ DataConstRef to_bytes() const; - std::vector triggers_; - double wallclock_time_; - std::unordered_map> port_message_counts_; - bool is_final_snapshot_; - Optional message_; - ::ymmsl::Settings settings_overlay_; + std::vector triggers; + double wallclock_time; + std::unordered_map> port_message_counts; + bool is_final_snapshot; + Optional message; + ::ymmsl::Settings settings_overlay; }; /** Metadata of a snapshot for sending to the muscle_manager. @@ -68,13 +68,13 @@ class SnapshotMetadata { static SnapshotMetadata from_snapshot( Snapshot const & snapshot, std::string const & snapshot_filename); - std::vector triggers_; - double wallclock_time_; - double timestamp_; - Optional next_timestamp_; - std::unordered_map> port_message_counts_; - bool is_final_snapshot_; - std::string snapshot_filename_; + std::vector triggers; + double wallclock_time; + double timestamp; + Optional next_timestamp; + std::unordered_map> port_message_counts; + bool is_final_snapshot; + std::string snapshot_filename; }; } } diff --git a/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp b/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp index cd5534d4..a4d0395c 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp +++ b/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp @@ -74,12 +74,12 @@ Optional SnapshotManager::prepare_resume( if (resume_snapshot.is_set()) { auto snapshot = load_snapshot_from_file(resume_snapshot.get()); - if (snapshot.message_.is_set()) { + if (snapshot.message.is_set()) { // snapshot.message is not set for implicit snapshots resume_from_snapshot_ = snapshot; - result = snapshot.message_.get().timestamp(); + result = snapshot.message.get().timestamp(); } - resume_overlay_ = snapshot.settings_overlay_; + resume_overlay_ = snapshot.settings_overlay; // TODO: implement this on Communicator! // communicator_.restore_message_counts(snapshot.port_message_counts_); @@ -95,18 +95,18 @@ Optional SnapshotManager::prepare_resume( bool SnapshotManager::resuming_from_intermediate() { return (resume_from_snapshot_.is_set() && - !resume_from_snapshot_.get().is_final_snapshot_); + !resume_from_snapshot_.get().is_final_snapshot); } bool SnapshotManager::resuming_from_final() { return (resume_from_snapshot_.is_set() && - resume_from_snapshot_.get().is_final_snapshot_); + resume_from_snapshot_.get().is_final_snapshot); } Message SnapshotManager::load_snapshot() { if (!resume_from_snapshot_.is_set()) throw std::runtime_error("Error: no snapshot to load."); - return resume_from_snapshot_.get().message_.get(); + return resume_from_snapshot_.get().message.get(); } double SnapshotManager::save_snapshot( diff --git a/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp b/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp index 67692c29..78129058 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_snapshot.cpp @@ -29,49 +29,49 @@ Snapshot create_snapshot() { TEST(libmuscle_snapshot, test_snapshot) { auto snapshot = create_snapshot(); - ASSERT_EQ(snapshot.triggers_.size(), 1); - ASSERT_STREQ(snapshot.triggers_[0].c_str(), "test triggers"); - ASSERT_DOUBLE_EQ(snapshot.wallclock_time_, 15.3); - ASSERT_EQ(snapshot.port_message_counts_.size(), 3); - ASSERT_EQ(snapshot.port_message_counts_.at("in"), std::vector({1})); - ASSERT_EQ(snapshot.port_message_counts_.at("out"), std::vector({4})); - ASSERT_EQ(snapshot.port_message_counts_.at("muscle_settings_in"), + ASSERT_EQ(snapshot.triggers.size(), 1); + ASSERT_STREQ(snapshot.triggers[0].c_str(), "test triggers"); + ASSERT_DOUBLE_EQ(snapshot.wallclock_time, 15.3); + ASSERT_EQ(snapshot.port_message_counts.size(), 3); + ASSERT_EQ(snapshot.port_message_counts.at("in"), std::vector({1})); + ASSERT_EQ(snapshot.port_message_counts.at("out"), std::vector({4})); + ASSERT_EQ(snapshot.port_message_counts.at("muscle_settings_in"), std::vector({0})); - ASSERT_TRUE(snapshot.is_final_snapshot_); - ASSERT_TRUE(snapshot.message_.is_set()); - ASSERT_DOUBLE_EQ(snapshot.message_.get().timestamp(), 1.2); - ASSERT_FALSE(snapshot.message_.get().has_next_timestamp()); - ASSERT_FALSE(snapshot.message_.get().has_settings()); - ASSERT_STREQ(snapshot.message_.get().data().as().c_str(), + ASSERT_TRUE(snapshot.is_final_snapshot); + ASSERT_TRUE(snapshot.message.is_set()); + ASSERT_DOUBLE_EQ(snapshot.message.get().timestamp(), 1.2); + ASSERT_FALSE(snapshot.message.get().has_next_timestamp()); + ASSERT_FALSE(snapshot.message.get().has_settings()); + ASSERT_STREQ(snapshot.message.get().data().as().c_str(), "test_data"); - ASSERT_EQ(snapshot.settings_overlay_["test"], 1); + ASSERT_EQ(snapshot.settings_overlay["test"], 1); auto binary_snapshot = snapshot.to_bytes(); Snapshot snapshot2 = Snapshot::from_bytes(binary_snapshot); - ASSERT_EQ(snapshot.triggers_, snapshot2.triggers_); - ASSERT_EQ(snapshot.wallclock_time_, snapshot2.wallclock_time_); - ASSERT_EQ(snapshot.port_message_counts_, snapshot2.port_message_counts_); - ASSERT_EQ(snapshot.is_final_snapshot_, snapshot2.is_final_snapshot_); - ASSERT_EQ(snapshot.message_.get().timestamp(), - snapshot2.message_.get().timestamp()); - ASSERT_EQ(snapshot.message_.get().data().as(), - snapshot2.message_.get().data().as()); - ASSERT_EQ(snapshot.settings_overlay_, snapshot2.settings_overlay_); + ASSERT_EQ(snapshot.triggers, snapshot2.triggers); + ASSERT_EQ(snapshot.wallclock_time, snapshot2.wallclock_time); + ASSERT_EQ(snapshot.port_message_counts, snapshot2.port_message_counts); + ASSERT_EQ(snapshot.is_final_snapshot, snapshot2.is_final_snapshot); + ASSERT_EQ(snapshot.message.get().timestamp(), + snapshot2.message.get().timestamp()); + ASSERT_EQ(snapshot.message.get().data().as(), + snapshot2.message.get().data().as()); + ASSERT_EQ(snapshot.settings_overlay, snapshot2.settings_overlay); } TEST(libmuscle_snapshot, test_snapshot_metadata) { auto snapshot = create_snapshot(); auto metadata = SnapshotMetadata::from_snapshot(snapshot, "test"); - ASSERT_EQ(metadata.triggers_, snapshot.triggers_); - ASSERT_EQ(metadata.wallclock_time_, snapshot.wallclock_time_); - ASSERT_EQ(metadata.port_message_counts_, snapshot.port_message_counts_); - ASSERT_EQ(metadata.is_final_snapshot_, snapshot.is_final_snapshot_); - ASSERT_EQ(metadata.timestamp_, snapshot.message_.get().timestamp()); - ASSERT_EQ(metadata.next_timestamp_.is_set(), - snapshot.message_.get().has_next_timestamp()); - ASSERT_EQ(metadata.snapshot_filename_, "test"); + ASSERT_EQ(metadata.triggers, snapshot.triggers); + ASSERT_EQ(metadata.wallclock_time, snapshot.wallclock_time); + ASSERT_EQ(metadata.port_message_counts, snapshot.port_message_counts); + ASSERT_EQ(metadata.is_final_snapshot, snapshot.is_final_snapshot); + ASSERT_EQ(metadata.timestamp, snapshot.message.get().timestamp()); + ASSERT_EQ(metadata.next_timestamp.is_set(), + snapshot.message.get().has_next_timestamp()); + ASSERT_EQ(metadata.snapshot_filename, "test"); } TEST(libmuscle_snapshot, test_message_with_settings) { @@ -79,22 +79,22 @@ TEST(libmuscle_snapshot, test_message_with_settings) { settings["settings"] = true; Message message(1.0, 2.0, "test_data", settings); Snapshot snapshot ({}, 0, {}, false, message, {}); - ASSERT_TRUE(snapshot.message_.get().settings().at("settings").as()); + ASSERT_TRUE(snapshot.message.get().settings().at("settings").as()); auto binary_snapshot = snapshot.to_bytes(); Snapshot snapshot2 = Snapshot::from_bytes(binary_snapshot); - ASSERT_TRUE(snapshot2.message_.get().settings().at("settings").as()); + ASSERT_TRUE(snapshot2.message.get().settings().at("settings").as()); } TEST(libmuscle_snapshot, test_implicit_snapshot) { Optional message; Snapshot snapshot({}, 0, {}, true, message, {}); - ASSERT_FALSE(snapshot.message_.is_set()); + ASSERT_FALSE(snapshot.message.is_set()); auto binary_snapshot = snapshot.to_bytes(); Snapshot snapshot2 = Snapshot::from_bytes(binary_snapshot); - ASSERT_FALSE(snapshot2.message_.is_set()); + ASSERT_FALSE(snapshot2.message.is_set()); } From b8467fd8a9fc193ad4ed70c81ee66168da352ae5 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 14 Feb 2023 11:30:43 +0100 Subject: [PATCH 050/188] Factor instance_id into MMPClient Also for get_checkpoint_info and submit_snapshot_metadata. --- libmuscle/python/libmuscle/instance.py | 2 +- libmuscle/python/libmuscle/mmp_client.py | 10 ++++------ libmuscle/python/libmuscle/snapshot_manager.py | 4 ++-- .../python/libmuscle/test/test_snapshot_manager.py | 9 +++------ 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/libmuscle/python/libmuscle/instance.py b/libmuscle/python/libmuscle/instance.py index 84c948bd..17d576e7 100644 --- a/libmuscle/python/libmuscle/instance.py +++ b/libmuscle/python/libmuscle/instance.py @@ -183,7 +183,7 @@ def __init__( # Note: get_checkpoint_info needs to have the ports initialized # so it comes after self._connect() - checkpoint_info = self.__manager.get_checkpoint_info(self._instance_id) + checkpoint_info = self.__manager.get_checkpoint_info() elapsed_time, checkpoints = checkpoint_info[0:2] self._trigger_manager.set_checkpoint_info(elapsed_time, checkpoints) diff --git a/libmuscle/python/libmuscle/mmp_client.py b/libmuscle/python/libmuscle/mmp_client.py index da21b547..3fe82a91 100644 --- a/libmuscle/python/libmuscle/mmp_client.py +++ b/libmuscle/python/libmuscle/mmp_client.py @@ -149,17 +149,15 @@ def submit_profile_events(self, events: Iterable[ProfileEvent]) -> None: self._call_manager(request) def submit_snapshot_metadata( - self, name: Reference, snapshot_metadata: SnapshotMetadata - ) -> None: + self, snapshot_metadata: SnapshotMetadata) -> None: """Send snapshot metadata to the manager. Args: - name: Name of the instance in the simulation. snapshot_metadata: Snapshot metadata to supply to the manager. """ request = [ RequestType.SUBMIT_SNAPSHOT.value, - str(name), + str(self._instance_id), dataclasses.asdict(snapshot_metadata)] self._call_manager(request) @@ -173,7 +171,7 @@ def get_settings(self) -> Settings: response = self._call_manager(request) return Settings(response[1]) - def get_checkpoint_info(self, name: Reference) -> _CheckpointInfoType: + def get_checkpoint_info(self) -> _CheckpointInfoType: """Get the checkpoint info from the manager. Returns: @@ -182,7 +180,7 @@ def get_checkpoint_info(self, name: Reference) -> _CheckpointInfoType: resume: path to the resume snapshot snapshot_directory: path to store snapshots """ - request = [RequestType.GET_CHECKPOINT_INFO.value, str(name)] + request = [RequestType.GET_CHECKPOINT_INFO.value, str(self._instance_id)] response = self._call_manager(request) return decode_checkpoint_info(*response[1:]) diff --git a/libmuscle/python/libmuscle/snapshot_manager.py b/libmuscle/python/libmuscle/snapshot_manager.py index fbdaef5b..c2bbb1a3 100644 --- a/libmuscle/python/libmuscle/snapshot_manager.py +++ b/libmuscle/python/libmuscle/snapshot_manager.py @@ -80,7 +80,7 @@ def prepare_resume( # Store a copy of the snapshot in the current run directory path = self.__store_snapshot(snapshot) metadata = SnapshotMetadata.from_snapshot(snapshot, str(path)) - self._manager.submit_snapshot_metadata(self._instance_id, metadata) + self._manager.submit_snapshot_metadata(metadata) return result @@ -148,7 +148,7 @@ def save_snapshot( path = self.__store_snapshot(snapshot) metadata = SnapshotMetadata.from_snapshot(snapshot, str(path)) - self._manager.submit_snapshot_metadata(self._instance_id, metadata) + self._manager.submit_snapshot_metadata(metadata) timestamp = msg.timestamp if msg is not None else float('-inf') if final and f_init_max_timestamp is not None: diff --git a/libmuscle/python/libmuscle/test/test_snapshot_manager.py b/libmuscle/python/libmuscle/test/test_snapshot_manager.py index e530ad06..59963966 100644 --- a/libmuscle/python/libmuscle/test/test_snapshot_manager.py +++ b/libmuscle/python/libmuscle/test/test_snapshot_manager.py @@ -38,8 +38,7 @@ def test_save_load_snapshot(tmp_path: Path) -> None: communicator.get_message_counts.assert_called_with() manager.submit_snapshot_metadata.assert_called() - instance, metadata = manager.submit_snapshot_metadata.call_args[0] - assert instance == instance_id + metadata, = manager.submit_snapshot_metadata.call_args[0] assert isinstance(metadata, SnapshotMetadata) assert metadata.triggers == ['test'] assert metadata.wallclock_time == 13.0 @@ -67,8 +66,7 @@ def test_save_load_snapshot(tmp_path: Path) -> None: Message(0.6, None, 'test data2'), True, ['test'], 42.2, 1.2, Settings()) - instance, metadata = manager.submit_snapshot_metadata.call_args[0] - assert instance == instance_id + metadata, = manager.submit_snapshot_metadata.call_args[0] assert isinstance(metadata, SnapshotMetadata) assert metadata.triggers == ['test'] assert metadata.wallclock_time == 42.2 @@ -105,8 +103,7 @@ def test_save_load_implicit_snapshot(tmp_path: Path) -> None: None, True, ['implicit'], 1.0, 1.5, Settings()) manager.submit_snapshot_metadata.assert_called_once() - instance, metadata = manager.submit_snapshot_metadata.call_args[0] - assert instance == instance_id + metadata, = manager.submit_snapshot_metadata.call_args[0] assert isinstance(metadata, SnapshotMetadata) snapshot_path = Path(metadata.snapshot_filename) manager.submit_snapshot_metadata.reset_mock() From 41a19c107433cc950c00e7e84f42151417472a4e Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 14 Feb 2023 14:18:03 +0100 Subject: [PATCH 051/188] C++ MMPClient changes for checkpointing --- libmuscle/cpp/src/libmuscle/mcp/protocol.hpp | 2 + libmuscle/cpp/src/libmuscle/mmp_client.cpp | 73 ++++++++++++++++++-- libmuscle/cpp/src/libmuscle/mmp_client.hpp | 26 ++++++- 3 files changed, 96 insertions(+), 5 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/mcp/protocol.hpp b/libmuscle/cpp/src/libmuscle/mcp/protocol.hpp index 2693919c..15ac48d2 100644 --- a/libmuscle/cpp/src/libmuscle/mcp/protocol.hpp +++ b/libmuscle/cpp/src/libmuscle/mcp/protocol.hpp @@ -15,6 +15,8 @@ enum class RequestType { get_settings = 4, submit_log_message = 5, submit_profile_events = 6, + submit_snapshot = 7, + get_checkpoint_info = 8, // MUSCLE Peer Protocol get_next_message = 21 diff --git a/libmuscle/cpp/src/libmuscle/mmp_client.cpp b/libmuscle/cpp/src/libmuscle/mmp_client.cpp index d468a21d..a221a038 100644 --- a/libmuscle/cpp/src/libmuscle/mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/mmp_client.cpp @@ -25,6 +25,7 @@ using libmuscle::impl::DataConstRef; using libmuscle::impl::mcp::unpack_data; using libmuscle::impl::Optional; using libmuscle::impl::ProfileEvent; +using libmuscle::impl::SnapshotMetadata; using std::chrono::steady_clock; using ymmsl::Conduit; using ymmsl::Reference; @@ -74,6 +75,14 @@ namespace { return encoded; } + template + Data encode_vector(std::vector const & value) { + auto retval = Data::nils(value.size()); + for (std::size_t i = 0u; i < value.size(); ++i) + retval[i] = value[i]; + return retval; + } + Data encode_profile_event(ProfileEvent const & event) { if (!event.start_time.is_set() || !event.stop_time.is_set()) { throw std::runtime_error( @@ -93,6 +102,31 @@ namespace { encode_optional(event.slot), encode_optional(event.message_size), encode_optional(event.message_timestamp)); } + + Data encode_snapshot_metadata(SnapshotMetadata const & snapshot_metadata) { + auto port_message_counts = Data::dict(); + for(auto const & kv : snapshot_metadata.port_message_counts) + port_message_counts[kv.first] = encode_vector(kv.second); + + auto metadata = Data::dict( + "triggers", encode_vector(snapshot_metadata.triggers), + "wallclock_time", snapshot_metadata.wallclock_time, + "timestamp", snapshot_metadata.timestamp, + "next_timsetamp", encode_optional(snapshot_metadata.next_timestamp), + "port_message_counts", port_message_counts, + "is_final_snapshot", snapshot_metadata.is_final_snapshot, + "snapshot_filename", snapshot_metadata.snapshot_filename + ); + + return metadata; + } + + template + Optional decode_optional(DataConstRef const & data) { + if (data.is_nil()) + return {}; + return data.as(); + } } namespace libmuscle { namespace impl { @@ -133,14 +167,21 @@ void MMPClient::submit_profile_events( auto response = call_manager_(request); } +void MMPClient::submit_snapshot_metadata( + SnapshotMetadata const & snapshot_metadata) { + auto request = Data::list( + static_cast(RequestType::submit_snapshot), + static_cast(instance_id_), + encode_snapshot_metadata(snapshot_metadata)); + + auto response = call_manager_(request); +} + void MMPClient::register_instance( std::vector const & locations, std::vector<::ymmsl::Port> const & ports) { - auto encoded_locs = Data::nils(locations.size()); - for (std::size_t i = 0u; i < locations.size(); ++i) - encoded_locs[i] = locations[i]; - + auto encoded_locs = encode_vector(locations); auto encoded_ports = Data::nils(ports.size()); for (std::size_t i = 0u; i < ports.size(); ++i) encoded_ports[i] = encode_port(ports[i]); @@ -171,6 +212,30 @@ ymmsl::Settings MMPClient::get_settings() { return settings; } +auto MMPClient::get_checkpoint_info() -> + std::tuple< + double, + DataConstRef, + Optional, + Optional + > +{ + auto request = Data::list( + static_cast(RequestType::get_checkpoint_info), + static_cast(instance_id_)); + auto response = call_manager_(request); + + if (response[0].as() != static_cast(ResponseType::success)) { + throw std::runtime_error("Error getting checkpoint info from manager."); + } + + return std::make_tuple( + response[1].as(), + response[2], + decode_optional(response[3]), + decode_optional(response[4])); +} + auto MMPClient::request_peers() -> std::tuple< std::vector<::ymmsl::Conduit>, diff --git a/libmuscle/cpp/src/libmuscle/mmp_client.hpp b/libmuscle/cpp/src/libmuscle/mmp_client.hpp index 84aa776b..c4a33767 100644 --- a/libmuscle/cpp/src/libmuscle/mmp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/mmp_client.hpp @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include @@ -56,12 +58,34 @@ class MMPClient { */ void submit_profile_events(std::vector const & events); + /** Send snapshot metadata to the manager. + * + * @param snapshot_metadata Snapshot metadata to supply to the manager. + */ + void submit_snapshot_metadata(SnapshotMetadata const & snapshot_metadata); + /** Get the global settings from the manager. * * @return A Settings object with the global settings. */ ymmsl::Settings get_settings(); + /** Get the checkpoint info from the manager. + * + * @return A tuple containing: + * elapsed_time: current elapsed wallclock time + * checkpoints: encoded checkpoint configuration + * resume: optional path to the resume snapshot + * snapshot_directory: optional path to store snapshots + */ + auto get_checkpoint_info() -> + std::tuple< + double, + DataConstRef, + Optional, + Optional + >; + /** Register a component instance with the manager. * * @param locations List of places where the instance can be reached. @@ -78,7 +102,7 @@ class MMPClient { * peer_interval_min and peer_interval_max. From there on, intervals * are drawn randomly from that range. * - * @return A tuple containng a list of conduits that this instance is + * @return A tuple containing a list of conduits that this instance is * attached to, a dictionary of peer dimensions, which is indexed * by Reference to the peer kernel and specifies how many * instances of the kernel there are, and a dictionary of peer From 013d2962bb83bf53876f8ac842b0e36d57a16bb9 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 14 Feb 2023 16:16:46 +0100 Subject: [PATCH 052/188] C++ Communicator: get/restore message counts --- libmuscle/cpp/src/libmuscle/communicator.cpp | 35 ++++- libmuscle/cpp/src/libmuscle/communicator.hpp | 10 ++ .../libmuscle/tests/mocks/mock_mpp_client.cpp | 10 +- .../libmuscle/tests/mocks/mock_mpp_client.hpp | 3 + .../src/libmuscle/tests/test_communicator.cpp | 132 ++++++++++++++++++ libmuscle/python/libmuscle/communicator.py | 4 +- 6 files changed, 188 insertions(+), 6 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/communicator.cpp b/libmuscle/cpp/src/libmuscle/communicator.cpp index 1e5f6ac7..39428607 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/communicator.cpp @@ -257,8 +257,6 @@ Message Communicator::receive_message( } int expected_message_number = port.get_num_messages(slot); - // TODO: handle f_init port counts for STATELESS and WEAKLY_STATEFUL - // components which didn't load a snapshot if (expected_message_number != mpp_message.message_number) { if (expected_message_number - 1 == mpp_message.message_number and port.is_resuming(slot)) { @@ -319,6 +317,39 @@ void Communicator::shutdown() { server->close(); } +Communicator::PortMessageCounts Communicator::get_message_counts() { + PortMessageCounts port_message_counts; + for(auto const & port_item : ports_) + port_message_counts[port_item.first] = port_item.second.get_message_counts(); + + assert(muscle_settings_in_.is_set()); // is always created by connect() + auto counts = muscle_settings_in_.get().get_message_counts(); + port_message_counts["muscle_settings_in"] = counts; + + return port_message_counts; +} + +void Communicator::restore_message_counts( + Communicator::PortMessageCounts const & port_message_counts) { + for (auto const & item : port_message_counts) { + if (item.first == "muscle_settings_in") { + assert(muscle_settings_in_.is_set()); // is always created by connect() + muscle_settings_in_.get().restore_message_counts(item.second); + } else { + auto port_item = ports_.find(item.first); + if (port_item != ports_.end()) { + port_item->second.restore_message_counts(item.second); + } else { + throw std::runtime_error( + "Unknown port " + item.first + " in snapshot." + " Have your port definitions changed since" + " the snapshot was taken?"); + } + } + } +} + + Reference Communicator::instance_id_() const { return kernel_ + index_; } diff --git a/libmuscle/cpp/src/libmuscle/communicator.hpp b/libmuscle/cpp/src/libmuscle/communicator.hpp index 2ae8e294..fd976fd5 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.hpp +++ b/libmuscle/cpp/src/libmuscle/communicator.hpp @@ -33,6 +33,8 @@ namespace libmuscle { namespace impl { */ class Communicator { public: + using PortMessageCounts = std::unordered_map>; + /** Create a Communicator. * * The instance reference must start with one or more Identifiers, @@ -170,6 +172,14 @@ class Communicator { */ void shutdown(); + /** Get message counts for all ports on the communicator. + */ + PortMessageCounts get_message_counts(); + + /** Restore message counts on all ports. + */ + void restore_message_counts(PortMessageCounts const & port_message_counts); + private: using Ports_ = std::unordered_map; diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp index f1d7ba9a..d5848422 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.cpp @@ -22,10 +22,12 @@ std::tuple MockMPPClient::receive( ::ymmsl::Reference const & receiver) { last_receiver = receiver; - return std::make_tuple( + auto retval = std::make_tuple( next_receive_message.encoded(), std::make_tuple( ProfileTimestamp(1.0), ProfileTimestamp(2.0), ProfileTimestamp(3.0))); + side_effect(); + return retval; } void MockMPPClient::close() {} @@ -38,7 +40,9 @@ void MockMPPClient::reset() { next_receive_message.port_length = 0; next_receive_message.timestamp = 0.0; next_receive_message.next_timestamp = 1.0; + next_receive_message.message_number = 0; last_receiver = "_none"; + side_effect = [](){}; // empty lambda function } int MockMPPClient::num_constructed = 0; @@ -50,10 +54,12 @@ Settings MockMPPClient::make_overlay_() { } MPPMessage MockMPPClient::next_receive_message( - "test.out", "test2.in", 0, 0.0, 1.0, make_overlay_(),0, 9.0, + "test.out", "test2.in", 0, 0.0, 1.0, make_overlay_(), 0, 9.0, Data::dict("test1", 12)); Reference MockMPPClient::last_receiver("_none"); +std::function MockMPPClient::side_effect; + } } diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp index f2ab5815..6c4875cb 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mpp_client.hpp @@ -6,6 +6,7 @@ #include +#include #include #include #include @@ -38,6 +39,8 @@ class MockMPPClient { static int num_constructed; static MPPMessage next_receive_message; static ::ymmsl::Reference last_receiver; + // Called after a mocked receive + static std::function side_effect; private: static ::ymmsl::Settings make_overlay_(); diff --git a/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp b/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp index aa4db788..a3b196f9 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_communicator.cpp @@ -626,3 +626,135 @@ TEST(libmuscle_communicator, receive_message_with_slot_and_settings) { ASSERT_EQ(msg.settings().at("test2"), 3.1); } +TEST(libmuscle_communicator, port_message_counts) { + reset_mocks(); + auto comm = connected_communicator(); + + Message message(0.0, "test", Settings()); + comm->send_message("out", message); + + auto msg_counts = comm->get_message_counts(); + ASSERT_EQ(msg_counts.size(), 3); + ASSERT_EQ(msg_counts["out"], std::vector({1})); + ASSERT_EQ(msg_counts["in"], std::vector({0})); + ASSERT_EQ(msg_counts["muscle_settings_in"], std::vector({0})); + + comm->restore_message_counts({ + {"out", {3}}, + {"in", {2}}, + {"muscle_settings_in", {4}}}); + comm->send_message("out", message); + msg_counts = comm->get_message_counts(); + ASSERT_EQ(msg_counts.size(), 3); + ASSERT_EQ(msg_counts["out"], std::vector({4})); + ASSERT_EQ(msg_counts["in"], std::vector({2})); + ASSERT_EQ(msg_counts["muscle_settings_in"], std::vector({4})); + + ASSERT_THROW( + comm->restore_message_counts({{"x?invalid_port", {3}}}), + std::runtime_error); +} + +TEST(libmuscle_communicator, vector_port_message_counts) { + reset_mocks(); + auto comm = connected_communicator2(); + + auto msg_counts = comm->get_message_counts(); + ASSERT_EQ(msg_counts.size(), 3); + std::vector expected_counts(20); // 20 zeros + ASSERT_EQ(msg_counts["out"], expected_counts); + ASSERT_EQ(msg_counts["in"], expected_counts); + ASSERT_EQ(msg_counts["muscle_settings_in"], std::vector({0})); + + Message message(0.0, "test", Settings()); + comm->send_message("out", message, 13); + msg_counts = comm->get_message_counts(); + ASSERT_EQ(msg_counts.size(), 3); + ASSERT_EQ(msg_counts["in"], expected_counts); + ASSERT_EQ(msg_counts["muscle_settings_in"], std::vector({0})); + expected_counts[13] = 1; + ASSERT_EQ(msg_counts["out"], expected_counts); + + int i = 0; + for (int& count : expected_counts) + count = i++; + // expected_counts = {0, 1, ..., 19} + comm->restore_message_counts({ + {"out", expected_counts}, + {"in", expected_counts}, + {"muscle_settings_in", {4}}}); + comm->send_message("out", message, 13); + msg_counts = comm->get_message_counts(); + ASSERT_EQ(msg_counts.size(), 3); + ASSERT_EQ(msg_counts["in"], expected_counts); + ASSERT_EQ(msg_counts["muscle_settings_in"], std::vector({4})); + expected_counts[13] = 14; + ASSERT_EQ(msg_counts["out"], expected_counts); +} + +TEST(libmuscle_communicator, port_count_validation) { + reset_mocks(); + MockMPPClient::next_receive_message.sender = "other.out[13]"; + MockMPPClient::next_receive_message.receiver = "kernel[13].in"; + + auto comm = connected_communicator(); + Message msg = comm->receive_message("in"); + + ASSERT_EQ(comm->get_message_counts()["in"], std::vector({1})); + + // the message received has message_number = 0 again + ASSERT_THROW(comm->receive_message("in"), std::runtime_error); +} + +TEST(libmuscle_communicator, port_discard_error_on_resume) { + reset_mocks(); + MockMPPClient::next_receive_message.sender = "other.out[13]"; + MockMPPClient::next_receive_message.receiver = "kernel[13].in"; + MockMPPClient::next_receive_message.message_number = 1; + + auto comm = connected_communicator(); + + comm->restore_message_counts({ + {"out", {0}}, + {"in", {2}}, + {"muscle_settings_in", {0}}}); + auto & ports = TestCommunicator::ports_(*comm); + for (auto const & port : ports) { + ASSERT_TRUE(port.second.is_resuming()); + } + + // In the next block, the first message with message_number=1 is discarded. + // The RuntimeError is raised when 'receiving' the second message with + // message_number=1 + ASSERT_THROW(comm->receive_message("in"), std::runtime_error); + // TODO: test that a debug message was logged? +} + +TEST(libmuscle_communicator, port_discard_success_on_resume) { + reset_mocks(); + MockMPPClient::next_receive_message.sender = "other.out[13]"; + MockMPPClient::next_receive_message.receiver = "kernel[13].in"; + MockMPPClient::next_receive_message.message_number = 1; + MockMPPClient::next_receive_message.timestamp = 1.0; + MockMPPClient::side_effect = [](){ + // ensure message_number increases after every receive() + MockMPPClient::next_receive_message.message_number ++; + MockMPPClient::next_receive_message.timestamp += 1.0; + }; + + auto comm = connected_communicator(); + + comm->restore_message_counts({ + {"out", {0}}, + {"in", {2}}, + {"muscle_settings_in", {0}}}); + auto & ports = TestCommunicator::ports_(*comm); + for (auto const & port : ports) { + ASSERT_TRUE(port.second.is_resuming()); + } + + auto msg = comm->receive_message("in"); + // TODO: test that a debug message was logged? + ASSERT_EQ(msg.timestamp(), 2.0); + ASSERT_EQ(comm->get_message_counts()["in"], std::vector({3})); +} diff --git a/libmuscle/python/libmuscle/communicator.py b/libmuscle/python/libmuscle/communicator.py index 933fee75..4edde7dc 100644 --- a/libmuscle/python/libmuscle/communicator.py +++ b/libmuscle/python/libmuscle/communicator.py @@ -412,7 +412,7 @@ def shutdown(self) -> None: def restore_message_counts(self, port_message_counts: Dict[str, List[int]] ) -> None: - """Restore message counts on all ports + """Restore message counts on all ports. """ for port_name, num_messages in port_message_counts.items(): if port_name == "muscle_settings_in": @@ -425,7 +425,7 @@ def restore_message_counts(self, port_message_counts: Dict[str, List[int]] ' the snapshot was taken?') def get_message_counts(self) -> Dict[str, List[int]]: - """Get message counts for all ports on the communicator + """Get message counts for all ports on the communicator. """ port_message_counts = {port_name: port.get_message_counts() for port_name, port in self._ports.items()} From 5ff567824cd3b16a5761de79c63f99faf2fcc114 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 14 Feb 2023 17:37:01 +0100 Subject: [PATCH 053/188] Finish SnapshotManager port to C++ --- .../cpp/src/libmuscle/snapshot_manager.cpp | 13 +-- .../tests/mocks/mock_communicator.cpp | 16 ++++ .../tests/mocks/mock_communicator.hpp | 8 ++ .../libmuscle/tests/mocks/mock_mmp_client.cpp | 19 ++++ .../libmuscle/tests/mocks/mock_mmp_client.hpp | 13 +++ .../libmuscle/tests/test_snapshot_manager.cpp | 86 +++++++++---------- 6 files changed, 99 insertions(+), 56 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp b/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp index a4d0395c..78ac4fbb 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp +++ b/libmuscle/cpp/src/libmuscle/snapshot_manager.cpp @@ -81,13 +81,11 @@ Optional SnapshotManager::prepare_resume( } resume_overlay_ = snapshot.settings_overlay; - // TODO: implement this on Communicator! - // communicator_.restore_message_counts(snapshot.port_message_counts_); + communicator_.restore_message_counts(snapshot.port_message_counts); // Store a copy of the snapshot in the current run directory auto path = store_snapshot_(snapshot); auto metadata = SnapshotMetadata::from_snapshot(snapshot, path); - // TODO: implement this on MMPClient! - // manager_.submit_snapshot_metadata(instance_id_, metadata); + manager_.submit_snapshot_metadata(metadata); } return result; @@ -114,9 +112,7 @@ double SnapshotManager::save_snapshot( std::vector const & triggers, double wallclock_time, Optional f_init_max_timestamp, ::ymmsl::Settings settings_overlay) { - // TODO: implement this on Communicator! - //auto port_message_counts = communicator_.get_message_counts(); - std::unordered_map> port_message_counts; + auto port_message_counts = communicator_.get_message_counts(); if (final) { // Decrease F_INIT port counts by one: F_INIT messages are already @@ -144,8 +140,7 @@ double SnapshotManager::save_snapshot( auto path = store_snapshot_(snapshot); auto metadata = SnapshotMetadata::from_snapshot(snapshot, path); - // TODO: implement this on MMPClient! - // manager_.submit_snapshot_metadata(instance_id_, metadata); + manager_.submit_snapshot_metadata(metadata); double timestamp = message.is_set() ? message.get().timestamp() : -INFINITY; if (final && f_init_max_timestamp.is_set()) { diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.cpp index a34d3df1..acc20d84 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.cpp @@ -99,6 +99,16 @@ void MockCommunicator::shutdown() { } +MockCommunicator::PortMessageCounts MockCommunicator::get_message_counts() { + return get_message_counts_return_value; +} + +void MockCommunicator::restore_message_counts( + PortMessageCounts const & port_message_counts){ + last_restored_message_counts = port_message_counts; +} + + void MockCommunicator::reset() { num_constructed = 0; settings_in_connected_return_value = false; @@ -109,6 +119,8 @@ void MockCommunicator::reset() { last_sent_port = ""; last_sent_message = Message(0.0); last_sent_slot = {}; + get_message_counts_return_value.clear(); + last_restored_message_counts = {}; } int MockCommunicator::num_constructed = 0; @@ -130,5 +142,9 @@ Message MockCommunicator::last_sent_message(0.0); Optional MockCommunicator::last_sent_slot; +MockCommunicator::PortMessageCounts MockCommunicator::get_message_counts_return_value; + +MockCommunicator::PortMessageCounts MockCommunicator::last_restored_message_counts; + } } diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.hpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.hpp index 32d47bee..d1274eb4 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.hpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_communicator.hpp @@ -23,6 +23,8 @@ using PortsDescription = std::unordered_map>; + MockCommunicator( ymmsl::Reference const & kernel, std::vector const & index, @@ -61,6 +63,10 @@ class MockCommunicator { void shutdown(); + PortMessageCounts get_message_counts(); + + void restore_message_counts(PortMessageCounts const & port_message_counts); + static void reset(); static int num_constructed; static bool settings_in_connected_return_value; @@ -72,6 +78,8 @@ class MockCommunicator { static std::string last_sent_port; static Message last_sent_message; static Optional last_sent_slot; + static PortMessageCounts get_message_counts_return_value; + static PortMessageCounts last_restored_message_counts; private: friend class TestCommunicator; diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp index e0edfd8f..241ec74f 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp @@ -27,6 +27,12 @@ void MockMMPClient::submit_profile_events(std::vector const & even last_submitted_profile_events = event; } +void MockMMPClient::submit_snapshot_metadata( + SnapshotMetadata const & snapshot_metadata) +{ + last_submitted_snapshot_metadata = snapshot_metadata; +} + void MockMMPClient::register_instance( std::vector const & locations, std::vector<::ymmsl::Port> const & ports) @@ -42,6 +48,17 @@ ymmsl::Settings MockMMPClient::get_settings() { return settings; } +auto MockMMPClient::get_checkpoint_info() -> + std::tuple< + double, + DataConstRef, + Optional, + Optional + > +{ + return {}; +} + auto MockMMPClient::request_peers() -> std::tuple< std::vector<::ymmsl::Conduit>, @@ -90,5 +107,7 @@ LogMessage MockMMPClient::last_submitted_log_message( std::vector MockMMPClient::last_submitted_profile_events; +Optional MockMMPClient::last_submitted_snapshot_metadata; + } } diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp index 4930d8af..158ec021 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.hpp @@ -7,7 +7,9 @@ #include #include +#include #include +#include #include @@ -25,8 +27,18 @@ class MockMMPClient { void submit_profile_events(std::vector const & event); + void submit_snapshot_metadata(SnapshotMetadata const & snapshot_metadata); + ymmsl::Settings get_settings(); + auto get_checkpoint_info() -> + std::tuple< + double, + DataConstRef, + Optional, + Optional + >; + void register_instance( std::vector const & locations, std::vector<::ymmsl::Port> const & ports); @@ -49,6 +61,7 @@ class MockMMPClient { static std::vector<::ymmsl::Port> last_registered_ports; static LogMessage last_submitted_log_message; static std::vector last_submitted_profile_events; + static Optional last_submitted_snapshot_metadata; }; using MMPClient = MockMMPClient; diff --git a/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp b/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp index a3c1fb36..2c7dcf73 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_snapshot_manager.cpp @@ -127,6 +127,11 @@ TEST_F(libmuscle_snapshot_manager, test_save_load_snapshot) { reset_mocks(); MockCommunicator communicator("test", {}, {}, mock_logger(), mock_profiler()); + MockCommunicator::PortMessageCounts port_message_counts = { + {"in", {1}}, + {"out", {2}}, + {"muscle_settings_in", {0}}}; + MockCommunicator::get_message_counts_return_value = port_message_counts; MockMMPClient manager("instance", ""); Reference instance_id("test[1]"); @@ -139,28 +144,21 @@ TEST_F(libmuscle_snapshot_manager, test_save_load_snapshot) { snapshot_manager.save_snapshot( Message(0.2, "test data"), false, {"test"}, 13.0, {}, {}); - // TODO: need to implement this on the mocks - // communicator.get_message_counts.assert_called_with() - // manager.submit_snapshot_metadata.assert_called() - // instance, metadata = manager.submit_snapshot_metadata.call_args[0] - // assert instance == instance_id - // assert isinstance(metadata, SnapshotMetadata) - // assert metadata.triggers == ['test'] - // assert metadata.wallclock_time == 13.0 - // assert metadata.timestamp == 0.2 - // assert metadata.next_timestamp is None - // assert metadata.port_message_counts == port_message_counts - // ASSERT_FALSE(metadata.is_final_snapshot); - // snapshot_path = Path(metadata.snapshot_filename) - // assert snapshot_path.parent == tmp_path - // assert snapshot_path.name == 'test-1_1.pack' - - // TODO: get from snapshot metadata instead of hardcoding - std::string snapshot_path = temp_dir_ + "/test-1_1.pack"; + ASSERT_TRUE(MockMMPClient::last_submitted_snapshot_metadata.is_set()); + auto & metadata = MockMMPClient::last_submitted_snapshot_metadata.get(); + ASSERT_EQ(metadata.triggers, std::vector({"test"})); + ASSERT_EQ(metadata.wallclock_time, 13.0); + ASSERT_EQ(metadata.timestamp, 0.2); + ASSERT_FALSE(metadata.next_timestamp.is_set()); + ASSERT_EQ(metadata.port_message_counts, port_message_counts); + ASSERT_FALSE(metadata.is_final_snapshot); + auto snapshot_path = metadata.snapshot_filename; + ASSERT_EQ(snapshot_path, temp_dir_ + "/test-1_1.pack"); SnapshotManager snapshot_manager2( instance_id, manager, communicator, mock_logger()); snapshot_manager2.prepare_resume(snapshot_path, temp_dir_); + ASSERT_EQ(MockCommunicator::last_restored_message_counts, port_message_counts); ASSERT_TRUE(snapshot_manager2.resuming_from_intermediate()); ASSERT_FALSE(snapshot_manager2.resuming_from_final()); @@ -173,19 +171,16 @@ TEST_F(libmuscle_snapshot_manager, test_save_load_snapshot) { snapshot_manager2.save_snapshot( Message(0.6, "test data2"), true, {"test"}, 42.2, 1.2, {}); - // TODO: need to implement this on the mocks - // instance, metadata = manager.submit_snapshot_metadata.call_args[0] - // assert instance == instance_id - // assert isinstance(metadata, SnapshotMetadata) - // assert metadata.triggers == ['test'] - // assert metadata.wallclock_time == 42.2 - // assert metadata.timestamp == 0.6 - // assert metadata.next_timestamp is None - // assert metadata.port_message_counts == port_message_counts - // assert metadata.is_final_snapshot - // snapshot_path = Path(metadata.snapshot_filename) - // assert snapshot_path.parent == tmp_path - // assert snapshot_path.name == 'test-1_3.pack' + ASSERT_TRUE(MockMMPClient::last_submitted_snapshot_metadata.is_set()); + metadata = MockMMPClient::last_submitted_snapshot_metadata.get(); + ASSERT_EQ(metadata.triggers, std::vector({"test"})); + ASSERT_EQ(metadata.wallclock_time, 42.2); + ASSERT_EQ(metadata.timestamp, 0.6); + ASSERT_FALSE(metadata.next_timestamp.is_set()); + ASSERT_EQ(metadata.port_message_counts, port_message_counts); + ASSERT_TRUE(metadata.is_final_snapshot); + snapshot_path = metadata.snapshot_filename; + ASSERT_EQ(snapshot_path, temp_dir_ + "/test-1_3.pack"); ASSERT_TRUE(snapshot_manager2.resuming_from_intermediate()); ASSERT_FALSE(snapshot_manager2.resuming_from_final()); @@ -198,6 +193,11 @@ TEST_F(libmuscle_snapshot_manager, test_save_load_implicit_snapshot) { reset_mocks(); MockCommunicator communicator("test", {}, {}, mock_logger(), mock_profiler()); + MockCommunicator::PortMessageCounts port_message_counts = { + {"in", {1}}, + {"out", {2}}, + {"muscle_settings_in", {0}}}; + MockCommunicator::get_message_counts_return_value = port_message_counts; MockMMPClient manager("instance", ""); Reference instance_id("test[1]"); @@ -210,29 +210,21 @@ TEST_F(libmuscle_snapshot_manager, test_save_load_implicit_snapshot) { // save implicit snapshot, i.e. Message=not set snapshot_manager.save_snapshot({}, true, {"implicit"}, 1.0, 1.5, {}); - // TODO: need to implement this on the mocks - // manager.submit_snapshot_metadata.assert_called_once() - // instance, metadata = manager.submit_snapshot_metadata.call_args[0] - // assert instance == instance_id - // assert isinstance(metadata, SnapshotMetadata) - // snapshot_path = Path(metadata.snapshot_filename) - // manager.submit_snapshot_metadata.reset_mock() - - // TODO: get from snapshot metadata instead of hardcoding - std::string snapshot_path = temp_dir_ + "/test-1_1.pack"; + ASSERT_TRUE(MockMMPClient::last_submitted_snapshot_metadata.is_set()); + auto & metadata = MockMMPClient::last_submitted_snapshot_metadata.get(); + std::string snapshot_path = metadata.snapshot_filename; + MockMMPClient::reset(); SnapshotManager snapshot_manager2( instance_id, manager, communicator, mock_logger()); snapshot_manager2.prepare_resume(snapshot_path, temp_dir_); - // TODO: need to implement this on the mocks - // communicator.restore_message_counts.assert_called_with(port_message_counts) - // manager.submit_snapshot_metadata.assert_called_once() - // manager.submit_snapshot_metadata.reset_mock() + ASSERT_EQ(MockCommunicator::last_restored_message_counts, port_message_counts); + ASSERT_TRUE(MockMMPClient::last_submitted_snapshot_metadata.is_set()); + MockMMPClient::reset(); ASSERT_FALSE(snapshot_manager2.resuming_from_intermediate()); ASSERT_FALSE(snapshot_manager2.resuming_from_final()); snapshot_manager2.save_snapshot({}, true, {"implicit"}, 12.3, 2.5, {}); - // TODO: need to implement this on the mocks - // manager.submit_snapshot_metadata.assert_called_once() + ASSERT_TRUE(MockMMPClient::last_submitted_snapshot_metadata.is_set()); } From 47d25ffaf44a4ad6a3d139a2f3a6eed02c4bd259 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 17 Feb 2023 14:53:49 +0100 Subject: [PATCH 054/188] Implement public checkpointing API on Instance Note: not ready for MPI yet. --- .../cpp/src/libmuscle/checkpoint_triggers.hpp | 4 + libmuscle/cpp/src/libmuscle/instance.cpp | 373 +++++++++++++++--- libmuscle/cpp/src/libmuscle/instance.hpp | 106 +++++ .../cpp/src/libmuscle/snapshot_manager.hpp | 4 + .../libmuscle/tests/mocks/mock_mmp_client.cpp | 10 +- .../cpp/src/libmuscle/tests/test_instance.cpp | 1 + 6 files changed, 437 insertions(+), 61 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/checkpoint_triggers.hpp b/libmuscle/cpp/src/libmuscle/checkpoint_triggers.hpp index bc8d2fc1..f37288ad 100644 --- a/libmuscle/cpp/src/libmuscle/checkpoint_triggers.hpp +++ b/libmuscle/cpp/src/libmuscle/checkpoint_triggers.hpp @@ -122,6 +122,10 @@ class TriggerManager { */ double checkpoints_considered_until(); + /** Returns whether checkpoints are defined + */ + bool has_checkpoints() const { return has_checkpoints_; } + /** Ensure our elapsed time is at least the given value */ void harmonise_wall_time(double at_least); diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index 527713f1..e75e7e02 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -9,6 +10,8 @@ #include #include #include +#include +#include #include @@ -96,11 +99,19 @@ class Instance::Impl { Optional slot, Optional default_msg, bool with_settings); + bool resuming(); + bool should_init(); + Message load_snapshot(); + bool should_save_snapshot(double timestamp); + void save_snapshot(Message message); + bool should_save_final_snapshot(); + void save_final_snapshot(Message message); private: ::ymmsl::Reference instance_name_; std::unique_ptr manager_; std::unique_ptr logger_; + std::unique_ptr api_guard_; std::unique_ptr profiler_; std::unique_ptr communicator_; #ifdef MUSCLE_ENABLE_MPI @@ -110,7 +121,12 @@ class Instance::Impl { #endif PortsDescription declared_ports_; SettingsManager settings_manager_; - bool first_run_; + std::unique_ptr snapshot_manager_; + std::unique_ptr trigger_manager_; + Optional first_run_; + Optional do_reuse_; + bool do_resume_; + bool do_init_; std::unordered_map<::ymmsl::Reference, Message> f_init_cache_; bool is_shut_down_; InstanceFlags flags_; @@ -118,6 +134,7 @@ class Instance::Impl { void register_(); void connect_(); void deregister_(); + void setup_checkpointing_(); ::ymmsl::Reference make_full_name_(int argc, char const * const argv[]) const; std::string extract_manager_location_(int argc, char const * const argv[]) const; @@ -126,8 +143,15 @@ class Instance::Impl { std::vector<::ymmsl::Port> list_declared_ports_() const; void check_port_(std::string const & port_name); bool receive_settings_(); + bool have_f_init_connections_(); + bool pre_receive_(); void pre_receive_(std::string const & port_name, Optional slot); void pre_receive_f_init_(); + Optional f_init_max_timestamp_(); + bool decide_reuse_instance_(); + void save_snapshot_( + Optional message, bool final, + Optional f_init_max_timestamp); void set_local_log_level_(); void set_remote_log_level_(); void apply_overlay_(Message const & message); @@ -160,11 +184,15 @@ Instance::Impl::Impl( #endif , declared_ports_(ports) , settings_manager_() - , first_run_(true) + , first_run_() + , do_reuse_() + , do_resume_(false) + , do_init_(false) , f_init_cache_() , is_shut_down_(false) , flags_(flags) { + api_guard_.reset(new APIGuard(!!(flags_ & InstanceFlags::USES_CHECKPOINT_API))); #ifdef MUSCLE_ENABLE_MPI MPI_Comm_dup(communicator, &mpi_comm_); if (mpi_barrier_.is_root()) { @@ -180,8 +208,15 @@ Instance::Impl::Impl( communicator_.reset( new Communicator(name_(), index_(), ports, *logger_, *profiler_)); + snapshot_manager_.reset(new SnapshotManager( + instance_name_, *manager_, *communicator_, *logger_)); + trigger_manager_.reset(new TriggerManager()); + register_(); connect_(); + // Note: setup_checkpointing_() needs to have the ports initialized + // so it comes after connect_() + setup_checkpointing_(); set_local_log_level_(); set_remote_log_level_(); #ifdef MUSCLE_ENABLE_MPI @@ -212,77 +247,41 @@ Instance::Impl::~Impl() { } bool Instance::Impl::reuse_instance() { + api_guard_->verify_reuse_instance(); + bool do_reuse; + if (do_reuse_.is_set()) { + // thank you, should_save_final_snapshot, for running this already + do_reuse = do_reuse_.get(); + do_reuse_ = {}; + } else { + do_reuse = decide_reuse_instance_(); + } + + // now first_run_, do_resume_ and do_init_ are also set correctly #ifdef MUSCLE_ENABLE_MPI if (mpi_barrier_.is_root()) { #endif - do_reuse = receive_settings_(); - - // TODO: f_init_cache_ should be empty here, or the user didn't receive - // something that was sent on the last go-around. At least emit a warning. - pre_receive_f_init_(); - - set_local_log_level_(); - set_remote_log_level_(); - - auto ports = communicator_->list_ports(); - - bool f_init_not_connected = true; - if (ports.count(Operator::F_INIT) != 0) - for (auto const & port : ports.at(Operator::F_INIT)) - if (communicator_->get_port(port).is_connected()) { - f_init_not_connected = false; - break; - } - - bool no_settings_in = !communicator_->settings_in_connected(); - - if (f_init_not_connected && no_settings_in) { - do_reuse = first_run_; - first_run_ = false; - } - else { - for (auto const & ref_msg : f_init_cache_) - if (is_close_port(ref_msg.second.data())) - do_reuse = false; + bool do_implicit_checkpoint = ( + !first_run_.get() && + !(InstanceFlags::USES_CHECKPOINT_API & flags_) && + (!!(InstanceFlags::STATE_NOT_REQUIRED_FOR_NEXT_USE & flags_) or + !!(InstanceFlags::KEEPS_NO_STATE_FOR_NEXT_USE & flags_))); + + if (do_implicit_checkpoint) { + if (trigger_manager_->should_save_final_snapshot( + do_reuse, f_init_max_timestamp_())) + save_snapshot_({}, true, f_init_max_timestamp_()); } #ifdef MUSCLE_ENABLE_MPI - mpi_barrier_.signal(); - int do_reuse_mpi = do_reuse; - MPI_Bcast(&do_reuse_mpi, 1, MPI_INT, mpi_root_, mpi_comm_); - } - else { - mpi_barrier_.wait(); - int do_reuse_mpi; - MPI_Bcast(&do_reuse_mpi, 1, MPI_INT, mpi_root_, mpi_comm_); - do_reuse = do_reuse_mpi; - } -#endif - -#ifdef MUSCLE_ENABLE_MPI - if (mpi_barrier_.is_root()) { - auto soverlay_data = Data(settings_manager_.overlay); - msgpack::sbuffer sbuf; - msgpack::pack(sbuf, soverlay_data); - int size = sbuf.size(); - MPI_Bcast(&size, 1, MPI_INT, mpi_root_, mpi_comm_); - MPI_Bcast(sbuf.data(), size, MPI_CHAR, mpi_root_, mpi_comm_); - } - else { - int size; - MPI_Bcast(&size, 1, MPI_INT, mpi_root_, mpi_comm_); - std::vector buf(size); - MPI_Bcast(&buf[0], size, MPI_CHAR, mpi_root_, mpi_comm_); - auto zone = std::make_shared(); - DataConstRef soverlay_data = mcp::unpack_data(zone, &buf[0], size); - settings_manager_.overlay = soverlay_data.as(); } #endif if (!do_reuse) shutdown_(); + api_guard_->reuse_instance_done(do_reuse); return do_reuse; } @@ -452,6 +451,38 @@ void Instance::Impl::deregister_() { logger_->info("Deregistered from the manager"); } +void Instance::Impl::setup_checkpointing_() { + auto checkpoint_info = manager_->get_checkpoint_info(); + + auto elapsed_time = std::get<0>(checkpoint_info); + auto checkpoints = std::get<1>(checkpoint_info); + trigger_manager_->set_checkpoint_info(elapsed_time, checkpoints); + + auto checkpoint_support_mask = ( + InstanceFlags::USES_CHECKPOINT_API | + InstanceFlags::KEEPS_NO_STATE_FOR_NEXT_USE | + InstanceFlags::STATE_NOT_REQUIRED_FOR_NEXT_USE); + if (trigger_manager_->has_checkpoints() && !(flags_ & checkpoint_support_mask)) { + std::string msg( + "The workflow has requested checkpoints, but this instance" + " does not support checkpointing. Please consult the" + " MUSCLE3 checkpointing documentation how to add" + " checkpointing support."); + logger_->critical(msg); + shutdown_(); + throw std::runtime_error(msg); + } + + auto resume_snapshot = std::get<2>(checkpoint_info); + auto snapshot_dir = std::get<3>(checkpoint_info); + auto saved_at = snapshot_manager_->prepare_resume(resume_snapshot, snapshot_dir); + // resume settings overlay + settings_manager_.overlay = snapshot_manager_->resume_overlay(); + + if (saved_at.is_set()) + trigger_manager_->update_checkpoints(saved_at.get()); +} + Message Instance::Impl::receive_message( std::string const & port_name, Optional slot, @@ -536,6 +567,54 @@ Message Instance::Impl::receive_message( return result; } +bool Instance::Impl::resuming() { + api_guard_->verify_resuming(); + api_guard_->resuming_done(do_resume_); + return do_resume_; +} + +bool Instance::Impl::should_init() { + api_guard_->verify_should_init(); + api_guard_->should_init_done(); + return do_init_; +} + +Message Instance::Impl::load_snapshot() { + api_guard_->verify_load_snapshot(); + auto result = snapshot_manager_->load_snapshot(); + api_guard_->load_snapshot_done(); + return result; +} + +bool Instance::Impl::should_save_snapshot(double timestamp) { + api_guard_->verify_should_save_snapshot(); + auto result = trigger_manager_->should_save_snapshot(timestamp); + api_guard_->should_save_snapshot_done(result); + return result; +} + +void Instance::Impl::save_snapshot(Message message) { + api_guard_->verify_save_snapshot(); + save_snapshot_(message, false, {}); + api_guard_->save_snapshot_done(); +} + +bool Instance::Impl::should_save_final_snapshot() { + api_guard_->verify_should_save_final_snapshot(); + + do_reuse_ = decide_reuse_instance_(); + auto result = trigger_manager_->should_save_final_snapshot( + do_reuse_.get(), f_init_max_timestamp_()); + + api_guard_->should_save_final_snapshot_done(result); + return result; +} + +void Instance::Impl::save_final_snapshot(Message message) { + api_guard_->verify_save_final_snapshot(); + save_snapshot_(message, true, f_init_max_timestamp_()); + api_guard_->save_final_snapshot_done(); +} /* Returns instance name. * @@ -668,6 +747,34 @@ bool Instance::Impl::receive_settings_() { return true; } +/** Checks whether we have connected F_INIT ports. + * + * This includes muscle_settings_in, and any user-defined ports. + */ +bool Instance::Impl::have_f_init_connections_() { + auto ports = communicator_->list_ports(); + if (ports.count(Operator::F_INIT) != 0) + for (auto const & port : ports.at(Operator::F_INIT)) + if (communicator_->get_port(port).is_connected()) + return true; + return communicator_->settings_in_connected(); +} + +/** Pre-receives on all ports. + * + * This includes muscle_settings_in and all user-defined ports. + * + * @return true iff no ClosePort messages were received. + */ +bool Instance::Impl::pre_receive_() { + bool all_ports_open = receive_settings_(); + pre_receive_f_init_(); + for (auto const & ref_msg : f_init_cache_) + if (is_close_port(ref_msg.second.data())) + all_ports_open = false; + return all_ports_open; +} + /* Pre-receive on the given port and slot, if any. */ void Instance::Impl::pre_receive_( @@ -713,6 +820,124 @@ void Instance::Impl::pre_receive_f_init_() { } } +/** Return max timestamp of pre-received F_INIT messages + */ +Optional Instance::Impl::f_init_max_timestamp_() { + Optional result; + for (auto const & ref_msg : f_init_cache_) { + auto timestamp = ref_msg.second.timestamp(); + if (!result.is_set() || result.get() < timestamp) + result = timestamp; + } + return result; +} + +/** Decide whether and how to reuse the instance. + * + * This sets self._first_run, self._do_resume and self._do_init, and + * returns whether to reuse one more time. This is the real top of + * the reuse loop, and it gets called by reuse_instance and + * should_save_final_snapshot. + */ +bool Instance::Impl::decide_reuse_instance_() { + if (!first_run_.is_set()) + first_run_ = true; + else + first_run_ = false; + + bool do_reuse; + +#ifdef MUSCLE_ENABLE_MPI + if (mpi_barrier_.is_root()) { +#endif + bool f_init_connected = have_f_init_connections_(); + if (first_run_.get() && snapshot_manager_->resuming_from_intermediate()) { + // resume from intermediate + do_resume_ = true; + do_init_ = false; + do_reuse = true; + } else if (first_run_.get() && snapshot_manager_->resuming_from_final()) { + // resume from final + if (f_init_connected) { + bool got_f_init_messages = pre_receive_(); + do_resume_ = true; + do_init_ = true; + do_reuse = got_f_init_messages; + } else { + do_resume_ = false; + do_init_ = false; + do_reuse = false; + } + } else { + // fresh start or resuming from implicit snapshot + do_resume_ = false; + + if (!f_init_connected) { + // simple straight single run without resuming + do_init_ = first_run_.get(); + do_reuse = first_run_.get(); + } else { + // not resuming and f_init connected, run while we get messages + bool got_f_init_messages = pre_receive_(); + do_init_ = got_f_init_messages; + do_reuse = got_f_init_messages; + } + } + +#ifdef MUSCLE_ENABLE_MPI + mpi_barrier_.signal(); + int do_reuse_mpi[3] = {do_reuse, do_resume_, do_init_}; + MPI_Bcast(do_reuse_mpi, 3, MPI_INT, mpi_root_, mpi_comm_); + } else { + mpi_barrier_.wait(); + int do_reuse_mpi[3]; + MPI_Bcast(do_reuse_mpi, 3, MPI_INT, mpi_root_, mpi_comm_); + do_reuse = do_reuse_mpi[0]; + do_resume_ = do_reuse_mpi[1]; + do_init_ = do_reuse_mpi[2]; + } +#endif + +#ifdef MUSCLE_ENABLE_MPI + if (mpi_barrier_.is_root()) { + auto soverlay_data = Data(settings_manager_.overlay); + msgpack::sbuffer sbuf; + msgpack::pack(sbuf, soverlay_data); + int size = sbuf.size(); + MPI_Bcast(&size, 1, MPI_INT, mpi_root_, mpi_comm_); + MPI_Bcast(sbuf.data(), size, MPI_CHAR, mpi_root_, mpi_comm_); + } + else { + int size; + MPI_Bcast(&size, 1, MPI_INT, mpi_root_, mpi_comm_); + std::vector buf(size); + MPI_Bcast(&buf[0], size, MPI_CHAR, mpi_root_, mpi_comm_); + auto zone = std::make_shared(); + DataConstRef soverlay_data = mcp::unpack_data(zone, &buf[0], size); + settings_manager_.overlay = soverlay_data.as(); + } +#endif + + return do_reuse; +} + +/** Save a snapshot to disk and notify manager. + * + * @param message The data to save. + * @param final Whether this is a final snapshot or an intermediate one. + * @param f_init_max_timestamp Timestamp for final snapshots. + */ +void Instance::Impl::save_snapshot_( + Optional message, bool final, + Optional f_init_max_timestamp) { + auto triggers = trigger_manager_->get_triggers(); + auto walltime = trigger_manager_->elapsed_walltime(); + auto timestamp = snapshot_manager_->save_snapshot( + message, final, triggers, walltime, + f_init_max_timestamp, settings_manager_.overlay); + trigger_manager_->update_checkpoints(timestamp); +} + /* Sets the level a log message must have to be printed locally. * * It gets this from the muscle_local_log_level setting. @@ -1061,6 +1286,34 @@ Message Instance::receive_with_settings( return impl_()->receive_message(port_name, slot, default_msg, true); } +bool Instance::resuming() { + return impl_()->resuming(); +} + +bool Instance::should_init() { + return impl_()->should_init(); +} + +Message Instance::load_snapshot() { + return impl_()->load_snapshot(); +} + +bool Instance::should_save_snapshot(double timestamp) { + return impl_()->should_save_snapshot(timestamp); +} + +void Instance::save_snapshot(Message message) { + impl_()->save_snapshot(message); +} + +bool Instance::should_save_final_snapshot() { + return impl_()->should_save_final_snapshot(); +} + +void Instance::save_final_snapshot(Message message) { + impl_()->save_final_snapshot(message); +} + Instance::Impl const * Instance::impl_() const { return pimpl_.get(); } diff --git a/libmuscle/cpp/src/libmuscle/instance.hpp b/libmuscle/cpp/src/libmuscle/instance.hpp index 9b651b3d..dca27379 100644 --- a/libmuscle/cpp/src/libmuscle/instance.hpp +++ b/libmuscle/cpp/src/libmuscle/instance.hpp @@ -629,6 +629,112 @@ class Instance { std::string const & port_name, int slot, Message const & default_msg); + /** Check if this instance is resuming from a snapshot. + * + * Must be used by submodels that implement the checkpointing API. You'll + * get a RuntimeError when not calling this method in an iteration of the + * reuse loop. + * + * This method returns True for the first iteration of the reuse loop after + * resuming from a previously taken snapshot. When resuming from a + * snapshot, the submodel must load its state from the snapshot as returned + * Instance::load_snapshot. + * + * @return true iff the submodel must resume from a snapshot. + */ + bool resuming(); + + /** Check if this instance should initialize. + * + * Must be used by submodels that implement the checkpointing API. + * + * When resuming from a previous snapshot, instances need not always + * execute the F_INIT phase of the submodel execution loop. Use this method + * before attempting to receive data on F_INIT ports. + * + * @return true if the submodel must execute the F_INIT step. + * @return false otherwise. + */ + bool should_init(); + + /** Load a snapshot. + * + * Must only be called when Instance::resuming returns True. + * + * @return Message containing the state as saved in a previous run + * through Instance::save_snapshot or Instance::save_final_snapshot. + */ + Message load_snapshot(); + + /** Check if a snapshot should be saved after the S Operator of the submodel. + * + * This method checks if a snapshot should be saved right now, based on the + * provided timestamp and passed wallclock time. + * + * When this method returns true, the submodel must also save a snapshot + * through Instance::save_snapshot. A std::runtime_error will be generated when + * not doing so. + * + * See also Instance::should_save_final_snapshot for the variant that must be + * called at the end of the reuse loop. + * + * @param timestamp current timestamp of the submodel. + * @return true iff a snapshot should be taken by the submodel according to the + * checkpoint rules provided in the ymmsl configuration. + */ + bool should_save_snapshot(double timestamp); + + /** Save a snapshot after the S Operator of the submodel. + * + * Before saving a snapshot, you should check using + * Instance::should_save_snapshot if a snapshot should be saved according to + * the checkpoint rules specified in the ymmsl configuration. You should + * use the same timestamp in the provided Message object as used to query + * Instance::should_save_snapshot. + * + * @param message Message object that is saved as snapshot. The message + * timestamp attribute should be the same as passed to + * Instance::should_save_snapshot. The data attribute can be used to + * store the internal state of the submodel. + */ + void save_snapshot(Message message); + + /** Check if a snapshot should be saved at the end of the reuse loop. + * + * This method checks if a snapshot should be saved now. + * + * When this method returns true, the submodel must also save a snapshot + * through Instance::save_final_snapshot. A std::runtime_error will be + * generated when not doing so. + * + * See also Instance::should_save_snapshot for the variant that may be called + * inside of a time-integration loop of the submodel. + * + * \note + * This method will block until it can determine whether a final + * snapshot should be taken. This means it must also determine if this + * instance is reused. + * + * @return true iff a final snapshot should be taken by the submodel according + * to the checkpoint rules provided in the ymmsl configuration. + */ + bool should_save_final_snapshot(); + + /** Save a snapshot at the end of the reuse loop. + * + * Before saving a snapshot, you should check using + * Instance::should_save_final_snapshot if a snapshot should be saved + * according to the checkpoint rules specified in the ymmsl configuration. + * + * See also Instance::save_snapshot for the variant that may be called after + * each S Operator of the submodel. + * + * @param message Message object that is saved as snapshot. The data + * attribute can be used to store the internal state of the + * submodel. + */ + void save_final_snapshot(Message message); + private: class Impl; std::unique_ptr pimpl_; diff --git a/libmuscle/cpp/src/libmuscle/snapshot_manager.hpp b/libmuscle/cpp/src/libmuscle/snapshot_manager.hpp index 7b8915fd..8d4ea0bd 100644 --- a/libmuscle/cpp/src/libmuscle/snapshot_manager.hpp +++ b/libmuscle/cpp/src/libmuscle/snapshot_manager.hpp @@ -47,6 +47,10 @@ class SnapshotManager { Optional const & resume_snapshot, Optional const & snapshot_directory); + /** Get the settings overlay to be used when resuming + */ + ::ymmsl::Settings resume_overlay() const { return resume_overlay_; } + /** Check whether we have an intermediate snapshot. * * Doesn't say whether we should resume now, just that we were diff --git a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp index 241ec74f..76bf67e6 100644 --- a/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/mocks/mock_mmp_client.cpp @@ -56,7 +56,15 @@ auto MockMMPClient::get_checkpoint_info() -> Optional > { - return {}; + return { + 0.1, + // no checkpoints defined: + Data::dict( + "at_end", false, + "wallclock_time", Data::list(), + "simulation_time", Data::list()), + {}, + {}}; } auto MockMMPClient::request_peers() -> diff --git a/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp b/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp index 637732e0..f462facb 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_instance.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include // then add mock implementations as needed. From 6056f7d786fc265511e7a413525a246861ac9777 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 17 Feb 2023 14:54:10 +0100 Subject: [PATCH 055/188] Some refactoring in Python checkpointing API --- libmuscle/python/libmuscle/instance.py | 74 +++++++++---------- .../python/libmuscle/snapshot_manager.py | 4 +- 2 files changed, 35 insertions(+), 43 deletions(-) diff --git a/libmuscle/python/libmuscle/instance.py b/libmuscle/python/libmuscle/instance.py index 17d576e7..8477a922 100644 --- a/libmuscle/python/libmuscle/instance.py +++ b/libmuscle/python/libmuscle/instance.py @@ -180,32 +180,9 @@ def __init__( self._register() self._connect() - - # Note: get_checkpoint_info needs to have the ports initialized + # Note: self._setup_checkpointing() needs to have the ports initialized # so it comes after self._connect() - checkpoint_info = self.__manager.get_checkpoint_info() - - elapsed_time, checkpoints = checkpoint_info[0:2] - self._trigger_manager.set_checkpoint_info(elapsed_time, checkpoints) - - if checkpoints and not (self._flags & _CHECKPOINT_SUPPORT_MASK): - raise RuntimeError( - 'The workflow has requested checkpoints, but this instance' - ' does not support checkpointing. Please consult the' - ' MUSCLE3 checkpointing documentation how to add' - ' checkpointing support.') - - resume_snapshot, snapshot_dir = checkpoint_info[2:4] - saved_at = self._snapshot_manager.prepare_resume( - resume_snapshot, snapshot_dir) - # Resume settings overlay - overlay = self._snapshot_manager._resume_overlay - if overlay is not None: - self._settings_manager.overlay = overlay - - if saved_at is not None: - self._trigger_manager.update_checkpoints(saved_at) - + self._setup_checkpointing() self._set_local_log_level() self._set_remote_log_level() @@ -533,8 +510,7 @@ def resuming(self) -> bool: by :meth:`load_snapshot`. Returns: - True iff the submodel must resume from a snapshot instead of the - usual F_INIT step during this iteration of the reuse loop. + True iff the submodel must resume from a snapshot. """ self._api_guard.verify_resuming() self._api_guard.resuming_done(self._do_resume) @@ -563,7 +539,7 @@ def load_snapshot(self) -> Message: Returns: Message object containing the state as saved in a previous run - through :meth:`save_snapshot` or :meth:`save_final_snapshot` + through :meth:`save_snapshot` or :meth:`save_final_snapshot`. Raises: RuntimeError: if not resuming from a snapshot. @@ -588,7 +564,7 @@ def should_save_snapshot(self, timestamp: float) -> bool: called at the end of the reuse loop. Args: - timestamp: current timestamp of the submodel + timestamp: current timestamp of the submodel. Returns: True iff a snapshot should be taken by the submodel according to the @@ -608,12 +584,6 @@ def save_snapshot(self, message: Message) -> None: use the same timestamp in the provided Message object as used to query `should_save_snapshot`. - Although it is allowed to save a snapshot even when - :meth:`should_save_snapshot` returns False, you should avoid this: this - situation is not likely to lead to a consistent snapshot over all - submodels of the run (and therefore it is not useful to restart from). - It could also lead to a lot of snapshot files clogging your file system. - See also :meth:`save_final_snapshot` for the variant that must be called at the end of the reuse loop. @@ -666,12 +636,6 @@ def save_final_snapshot(self, message: Message) -> None: :meth:`should_save_final_snapshot` if a snapshot should be saved according to the checkpoint rules specified in the ymmsl configuration. - Although it is allowed to save a snapshot even when - :meth:`should_save_final_snapshot` returns False, you should avoid this: - this situation is not likely to lead to a consistent snapshot over all - submodels of the run (and therefore it is not useful to restart from). - It could also lead to a lot of snapshot files clogging your file system. - See also :meth:`save_snapshot` for the variant that may be called after each S Operator of the submodel. @@ -727,6 +691,34 @@ def _deregister(self) -> None: self._profiler.shutdown() _logger.info('Deregistered from the manager') + def _setup_checkpointing(self) -> None: + """Setup checkpointing. + """ + checkpoint_info = self.__manager.get_checkpoint_info() + + elapsed_time, checkpoints = checkpoint_info[0:2] + self._trigger_manager.set_checkpoint_info(elapsed_time, checkpoints) + + if checkpoints and not (self._flags & _CHECKPOINT_SUPPORT_MASK): + err_msg = ( + 'The workflow has requested checkpoints, but this instance' + ' does not support checkpointing. Please consult the' + ' MUSCLE3 checkpointing documentation how to add' + ' checkpointing support.') + self.__shutdown(err_msg) + raise RuntimeError(err_msg) + + resume_snapshot, snapshot_dir = checkpoint_info[2:4] + saved_at = self._snapshot_manager.prepare_resume( + resume_snapshot, snapshot_dir) + # Resume settings overlay + overlay = self._snapshot_manager.resume_overlay + if overlay is not None: + self._settings_manager.overlay = overlay + + if saved_at is not None: + self._trigger_manager.update_checkpoints(saved_at) + @staticmethod def __extract_manager_location() -> str: """Gets the manager network location from the command line. diff --git a/libmuscle/python/libmuscle/snapshot_manager.py b/libmuscle/python/libmuscle/snapshot_manager.py index c2bbb1a3..9a0e0581 100644 --- a/libmuscle/python/libmuscle/snapshot_manager.py +++ b/libmuscle/python/libmuscle/snapshot_manager.py @@ -43,7 +43,7 @@ def __init__(self, self._manager = manager self._resume_from_snapshot: Optional[Snapshot] = None - self._resume_overlay = Settings() + self.resume_overlay = Settings() self._next_snapshot_num = 1 def prepare_resume( @@ -73,7 +73,7 @@ def prepare_resume( # snapshot.message is None for implicit snapshots self._resume_from_snapshot = snapshot result = snapshot.message.timestamp - self._resume_overlay = snapshot.settings_overlay + self.resume_overlay = snapshot.settings_overlay self._communicator.restore_message_counts( snapshot.port_message_counts) From 6c86d94f6708590d0f46b2fe119c3b3d26b50134 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 17 Feb 2023 15:35:22 +0100 Subject: [PATCH 056/188] APIGuard changes for non-root MPI processes --- libmuscle/cpp/src/libmuscle/api_guard.cpp | 20 ++++++--- libmuscle/cpp/src/libmuscle/api_guard.hpp | 6 ++- libmuscle/cpp/src/libmuscle/instance.cpp | 9 +++- .../src/libmuscle/tests/test_api_guard.cpp | 42 +++++++++++++------ 4 files changed, 58 insertions(+), 19 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/api_guard.cpp b/libmuscle/cpp/src/libmuscle/api_guard.cpp index f2d00125..7977f0e3 100644 --- a/libmuscle/cpp/src/libmuscle/api_guard.cpp +++ b/libmuscle/cpp/src/libmuscle/api_guard.cpp @@ -4,9 +4,10 @@ namespace libmuscle { namespace impl { -APIGuard::APIGuard(bool uses_checkpointing) +APIGuard::APIGuard(bool uses_checkpointing, bool is_root) : phase_(APIPhase::BEFORE_FIRST_REUSE_INSTANCE), - uses_checkpointing_(uses_checkpointing) + uses_checkpointing_(uses_checkpointing), + is_root_(is_root) {} void APIGuard::verify_reuse_instance() { @@ -46,7 +47,7 @@ void APIGuard::verify_resuming() { } void APIGuard::resuming_done(bool resuming) { - if (resuming) { + if (resuming && is_root_) { phase_ = APIPhase::BEFORE_LOAD_SNAPSHOT; } else { phase_ = APIPhase::BEFORE_SHOULD_INIT; @@ -54,6 +55,9 @@ void APIGuard::resuming_done(bool resuming) { } void APIGuard::verify_load_snapshot() { + if (!is_root_) + throw std::runtime_error( + "load_snapshot may only be called from the root process"); if (phase_ != APIPhase::BEFORE_LOAD_SNAPSHOT) { throw std::runtime_error( "Please check that we are resuming by calling resuming()" @@ -85,12 +89,15 @@ void APIGuard::verify_should_save_snapshot() { } void APIGuard::should_save_snapshot_done(bool should_save) { - if (should_save) { + if (should_save && is_root_) { phase_ = APIPhase::BEFORE_SAVE_SNAPSHOT; } } void APIGuard::verify_save_snapshot() { + if (!is_root_) + throw std::runtime_error( + "save_snapshot may only be called from the root process"); if (phase_ != APIPhase::BEFORE_SAVE_SNAPSHOT) { generic_error_messages_("save_snapshot"); throw std::runtime_error("Should be unreachable."); @@ -109,7 +116,7 @@ void APIGuard::verify_should_save_final_snapshot() { } void APIGuard::should_save_final_snapshot_done(bool should_save) { - if (should_save) { + if (should_save && is_root_) { phase_ = APIPhase::BEFORE_SAVE_FINAL_SNAPSHOT; } else { phase_ = APIPhase::BEFORE_REUSE_INSTANCE; @@ -117,6 +124,9 @@ void APIGuard::should_save_final_snapshot_done(bool should_save) { } void APIGuard::verify_save_final_snapshot() { + if (!is_root_) + throw std::runtime_error( + "save_final_snapshot may only be called from the root process"); if (phase_ != APIPhase::BEFORE_SAVE_FINAL_SNAPSHOT) { generic_error_messages_("save_final_snapshot"); throw std::runtime_error("Should be unreachable."); diff --git a/libmuscle/cpp/src/libmuscle/api_guard.hpp b/libmuscle/cpp/src/libmuscle/api_guard.hpp index a4517055..4fb6560b 100644 --- a/libmuscle/cpp/src/libmuscle/api_guard.hpp +++ b/libmuscle/cpp/src/libmuscle/api_guard.hpp @@ -67,8 +67,11 @@ class APIGuard { /** Create an APIGuard * * This starts the tracker in the phase BEFORE_FIRST_REUSE_INSTANCE. + * + * @param uses_checkpointing Whether this instance wants to use checkpointing. + * @param is_root Whether this is the root process (relevant for MPI). */ - explicit APIGuard(bool uses_checkpointing); + explicit APIGuard(bool uses_checkpointing, bool is_root); /** Check reuse_instance() */ @@ -145,6 +148,7 @@ class APIGuard { private: APIPhase phase_; bool uses_checkpointing_; + bool is_root_; void generic_error_messages_(std::string verify_phase); }; diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index e75e7e02..456a35bd 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -192,7 +192,14 @@ Instance::Impl::Impl( , is_shut_down_(false) , flags_(flags) { - api_guard_.reset(new APIGuard(!!(flags_ & InstanceFlags::USES_CHECKPOINT_API))); + api_guard_.reset(new APIGuard( + !!(flags_ & InstanceFlags::USES_CHECKPOINT_API), +#ifdef MUSCLE_ENABLE_MPI + mpi_barrier_.is_root() +#else + true +#endif + )); #ifdef MUSCLE_ENABLE_MPI MPI_Comm_dup(communicator, &mpi_comm_); if (mpi_barrier_.is_root()) { diff --git a/libmuscle/cpp/src/libmuscle/tests/test_api_guard.cpp b/libmuscle/cpp/src/libmuscle/tests/test_api_guard.cpp index d41302ef..11a55967 100644 --- a/libmuscle/cpp/src/libmuscle/tests/test_api_guard.cpp +++ b/libmuscle/cpp/src/libmuscle/tests/test_api_guard.cpp @@ -14,7 +14,7 @@ int main(int argc, char *argv[]) { } TEST(libmuscle_api_guard, test_no_checkpointing_support) { - auto guard = APIGuard(false); + auto guard = APIGuard(false, true); for (int i=0; i<3; ++i) { guard.verify_reuse_instance(); guard.reuse_instance_done(true); @@ -25,7 +25,7 @@ TEST(libmuscle_api_guard, test_no_checkpointing_support) { } TEST(libmuscle_api_guard, test_final_snapshot_only) { - auto guard = APIGuard(true); + auto guard = APIGuard(true, true); for (int i=0; i<4; ++i) { guard.verify_reuse_instance(); guard.reuse_instance_done(true); @@ -60,7 +60,7 @@ TEST(libmuscle_api_guard, test_final_snapshot_only) { TEST(libmuscle_api_guard, test_full_checkpointing) { - auto guard = APIGuard(true); + auto guard = APIGuard(true, true); for (int i=0; i<4; ++i) { guard.verify_reuse_instance(); guard.reuse_instance_done(true); @@ -105,6 +105,24 @@ TEST(libmuscle_api_guard, test_full_checkpointing) { guard.reuse_instance_done(false); } +TEST(libmsucle_api_guard, test_non_root) { + auto guard = APIGuard(true, false); + guard.verify_reuse_instance(); + guard.reuse_instance_done(true); + guard.verify_resuming(); + guard.resuming_done(true); + ASSERT_THROW(guard.verify_load_snapshot(), std::runtime_error); + guard.verify_should_init(); + guard.should_init_done(); + guard.verify_should_save_snapshot(); + guard.should_save_snapshot_done(true); + ASSERT_THROW(guard.verify_save_snapshot(), std::runtime_error); + guard.verify_should_save_final_snapshot(); + guard.should_save_final_snapshot_done(true); + ASSERT_THROW(guard.verify_save_final_snapshot(), std::runtime_error); + guard.verify_reuse_instance(); +} + static std::vector< std::function > api_guard_funs_({ [](APIGuard & guard){ guard.verify_reuse_instance(); }, // 0 [](APIGuard & guard){ guard.reuse_instance_done(true); }, // 1 @@ -141,50 +159,50 @@ void check_all_raise_except(APIGuard & guard, std::set excluded) { } } -TEST(libmuscle_api_guard, test_missing_resuming){ - auto guard = APIGuard(true); +TEST(libmuscle_api_guard, test_missing_resuming) { + auto guard = APIGuard(true, true); run_until_before(guard, 2); // 2 = verify_resuming check_all_raise_except(guard, {2}); } TEST(libmuscle_api_guard, test_missing_load_snapshot) { - auto guard = APIGuard(true); + auto guard = APIGuard(true, true); run_until_before(guard, 4); // 4 = verify_load_snapshot check_all_raise_except(guard, {4}); } TEST(libmuscle_api_guard, test_missing_should_init) { - auto guard = APIGuard(true); + auto guard = APIGuard(true, true); run_until_before(guard, 6); // 6 = verify_should_init check_all_raise_except(guard, {6}); } TEST(libmuscle_api_guard, test_missing_should_save) { - auto guard = APIGuard(true); + auto guard = APIGuard(true, true); run_until_before(guard, 8); // 8 = verify_should_save_snapshot check_all_raise_except(guard, {8, 12}); // 12 = verify_should_save_final_snapshot } TEST(libmuscle_api_guard, test_missing_save_snapshot) { - auto guard = APIGuard(true); + auto guard = APIGuard(true, true); run_until_before(guard, 10); // 10 = verify_save_snapshot check_all_raise_except(guard, {10}); } TEST(libmuscle_api_guard, test_missing_should_save_final) { - auto guard = APIGuard(true); + auto guard = APIGuard(true, true); run_until_before(guard, 12); // 12 = verify_should_save_final_snapshot check_all_raise_except(guard, {12, 8}); // 8 = verify_should_save_snapshot } TEST(libmuscle_api_guard, test_missing_save_final_snapshot) { - auto guard = APIGuard(true); + auto guard = APIGuard(true, true); run_until_before(guard, 14); // 14 = verify_save_final_snapshot check_all_raise_except(guard, {14}); } TEST(libmuscle_api_guard, test_double_should_save) { - auto guard = APIGuard(true); + auto guard = APIGuard(true, true); run_until_before(guard, 8); // 8 = verify_should_save_snapshot guard.verify_should_save_snapshot(); guard.should_save_snapshot_done(true); From 85cbe1b7c4305ca0d2dc00f42cc94eecc577351a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Fri, 17 Feb 2023 16:26:37 +0100 Subject: [PATCH 057/188] Check should_save_(final_)snapshot on root process And broadcast results to the others --- libmuscle/cpp/src/libmuscle/instance.cpp | 40 +++++++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/instance.cpp b/libmuscle/cpp/src/libmuscle/instance.cpp index 456a35bd..efc808aa 100644 --- a/libmuscle/cpp/src/libmuscle/instance.cpp +++ b/libmuscle/cpp/src/libmuscle/instance.cpp @@ -595,13 +595,28 @@ Message Instance::Impl::load_snapshot() { bool Instance::Impl::should_save_snapshot(double timestamp) { api_guard_->verify_should_save_snapshot(); - auto result = trigger_manager_->should_save_snapshot(timestamp); + bool result; +#ifdef MUSCLE_ENABLE_MPI + if (mpi_barrier_.is_root()) { +#endif + result = trigger_manager_->should_save_snapshot(timestamp); +#ifdef MUSCLE_ENABLE_MPI + mpi_barrier_.signal(); + int result_mpi = result; + MPI_Bcast(&result_mpi, 1, MPI_INT, mpi_root_, mpi_comm_); + } else { + mpi_barrier_.wait(); + int result_mpi; + MPI_Bcast(&result_mpi, 1, MPI_INT, mpi_root_, mpi_comm_); + result = result_mpi; + } +#endif api_guard_->should_save_snapshot_done(result); return result; } void Instance::Impl::save_snapshot(Message message) { - api_guard_->verify_save_snapshot(); + api_guard_->verify_save_snapshot(); // API guard verifies we are mpi_root save_snapshot_(message, false, {}); api_guard_->save_snapshot_done(); } @@ -610,15 +625,30 @@ bool Instance::Impl::should_save_final_snapshot() { api_guard_->verify_should_save_final_snapshot(); do_reuse_ = decide_reuse_instance_(); - auto result = trigger_manager_->should_save_final_snapshot( - do_reuse_.get(), f_init_max_timestamp_()); + bool result; +#ifdef MUSCLE_ENABLE_MPI + if (mpi_barrier_.is_root()) { +#endif + result = trigger_manager_->should_save_final_snapshot( + do_reuse_.get(), f_init_max_timestamp_()); +#ifdef MUSCLE_ENABLE_MPI + mpi_barrier_.signal(); + int result_mpi = result; + MPI_Bcast(&result_mpi, 1, MPI_INT, mpi_root_, mpi_comm_); + } else { + mpi_barrier_.wait(); + int result_mpi; + MPI_Bcast(&result_mpi, 1, MPI_INT, mpi_root_, mpi_comm_); + result = result_mpi; + } +#endif api_guard_->should_save_final_snapshot_done(result); return result; } void Instance::Impl::save_final_snapshot(Message message) { - api_guard_->verify_save_final_snapshot(); + api_guard_->verify_save_final_snapshot(); // API guard verifies we are mpi_root save_snapshot_(message, true, f_init_max_timestamp_()); api_guard_->save_final_snapshot_done(); } From 70431f7e2880f36ae7af00fb523e602b6313bc7a Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 10:58:10 +0100 Subject: [PATCH 058/188] Update integration_test: run_manager_with_actors Allows easier parametrization of actors --- integration_test/conftest.py | 80 +++++++++++-------- integration_test/test_cpp_macro_micro.py | 6 +- integration_test/test_fortran_macro_micro.py | 7 +- integration_test/test_multicast_cpp.py | 8 +- .../test_snapshot_complex_coupling.py | 9 +-- integration_test/test_snapshot_dispatch.py | 8 +- integration_test/test_snapshot_interact.py | 16 ++-- integration_test/test_snapshot_macro_micro.py | 41 +++++----- 8 files changed, 85 insertions(+), 90 deletions(-) diff --git a/integration_test/conftest.py b/integration_test/conftest.py index 210659b7..433ebadf 100644 --- a/integration_test/conftest.py +++ b/integration_test/conftest.py @@ -62,27 +62,34 @@ def _python_wrapper(instance_name, muscle_manager, callable): callable() -def run_manager_with_actors( - ymmsl_text, tmpdir, - cpp_actors={}, fortran_actors={}, python_actors={}): +def run_manager_with_actors(ymmsl_text, tmpdir, actors): """Start muscle_manager along with C++ and python actors. - C++ actors are a dict of instance->executable_path. Executable paths are - assumed to be relative to ../libmuscle/cpp/build/. LD_LIBRARY_PATH is - automatically updated to include the msgpack library path. + Args: + actors: a dictionary of lists containing details for each actor: + ``{"instance_name": ("language", "details", ...)}``. - Fortran actors are a dict of instance->executable_path. Executable paths are - assumed to be relative to ../libmuscle/fortran/build/. LD_LIBRARY_PATH is - automatically updated to include the msgpack library path. + Language can be ``"python"``, ``"cpp"`` or ``"fortran"``. Details + differ per language. - Python actors are a dict of instance->callable, where the callable - implements the python actor. + For python actors, details is a single callable which is executed + in a ``multiprocessing.Process``. + + For cpp actors, details is an executable path with optional arguments. + The executable paths are assumed to be relative to + ``../libmuscle/cpp/build/libmuscle/tests``. + + For fortran actors, details is an executable path. Executable paths are + assumed to be relative to ``../libmuscle/fortran/build/libmuscle/tests``. + + For both cpp and Fortran actors, LD_LIBRARY_PATH is automatically updated + to include the msgpack library path. """ env = os.environ.copy() ymmsl_doc = ymmsl.load(ymmsl_text) libmuscle_dir = Path(__file__).parents[1] / 'libmuscle' - cpp_build_dir = libmuscle_dir / 'cpp' / 'build' - fortran_build_dir = libmuscle_dir / 'fortran' / 'build' + cpp_build_dir = libmuscle_dir / 'cpp' / 'build' / 'libmuscle' / 'tests' + fortran_build_dir = libmuscle_dir / 'fortran' / 'build' / 'libmuscle' / 'tests' with ExitStack() as stack: # start muscle_manager and extract manager location @@ -96,28 +103,33 @@ def run_manager_with_actors( env['LD_LIBRARY_PATH'] = ':'.join(map(str, lib_paths)) native_processes = [] - # start native actors - for actors, build_dir in ((cpp_actors, cpp_build_dir), - (fortran_actors, fortran_build_dir)): - for instance_name, executable_path in actors.items(): - executable = build_dir / executable_path - f_out = stack.enter_context( - (tmpdir / f'{instance_name}_stdout.txt').open('w')) - f_err = stack.enter_context( - (tmpdir / f'{instance_name}_stderr.txt').open('w')) - native_processes.append(subprocess.Popen( - [str(executable), f'--muscle-instance={instance_name}'], - env=env, stdout=f_out, stderr=f_err)) - - # start python actors python_processes = [] - for instance_name, callable in python_actors.items(): - proc = mp.Process( - target=_python_wrapper, - args=(instance_name, env['MUSCLE_MANAGER'], callable), - name=instance_name) - proc.start() - python_processes.append(proc) + # start actors + for instance_name, (language, actor, *args) in actors.items(): + if language == "python": + # start python actor + proc = mp.Process( + target=_python_wrapper, + args=(instance_name, env['MUSCLE_MANAGER'], actor), + name=instance_name) + proc.start() + python_processes.append(proc) + continue + elif language == "cpp": + build_dir = cpp_build_dir + elif language == "fortran": + build_dir = fortran_build_dir + else: + raise ValueError(f"Unknown {language=}") + # start native code actor + executable = build_dir / actor + f_out = stack.enter_context( + (tmpdir / f'{instance_name}_stdout.txt').open('w')) + f_err = stack.enter_context( + (tmpdir / f'{instance_name}_stderr.txt').open('w')) + native_processes.append(subprocess.Popen( + [str(executable), *args, f'--muscle-instance={instance_name}'], + env=env, stdout=f_out, stderr=f_err)) # check results for proc in native_processes: diff --git a/integration_test/test_cpp_macro_micro.py b/integration_test/test_cpp_macro_micro.py index 10e60828..4d06dde3 100644 --- a/integration_test/test_cpp_macro_micro.py +++ b/integration_test/test_cpp_macro_micro.py @@ -1,4 +1,3 @@ -from pathlib import Path import sqlite3 import numpy as np @@ -72,8 +71,7 @@ def test_cpp_macro_micro(mmp_server_config_simple, tmp_path): run_manager_with_actors( mmp_server_config_simple, tmp_path, - {'micro': Path('libmuscle') / 'tests' / 'micro_model_test'}, - {}, - {'macro': macro}) + {'micro': ('cpp', 'micro_model_test'), + 'macro': ('python', macro)}) check_profile_output(tmp_path) diff --git a/integration_test/test_fortran_macro_micro.py b/integration_test/test_fortran_macro_micro.py index 0717891c..c8c5bbcd 100644 --- a/integration_test/test_fortran_macro_micro.py +++ b/integration_test/test_fortran_macro_micro.py @@ -1,5 +1,3 @@ -from pathlib import Path - import numpy as np from libmuscle import Instance, Message @@ -44,6 +42,5 @@ def test_fortran_macro_micro(mmp_server_config_simple, tmp_path): run_manager_with_actors( mmp_server_config_simple, tmp_path, - {}, - {'micro': Path('libmuscle') / 'tests' / 'fortran_micro_model_test'}, - {'macro': macro}) + {'micro': ('fortran', 'fortran_micro_model_test'), + 'macro': ('python', macro)}) diff --git a/integration_test/test_multicast_cpp.py b/integration_test/test_multicast_cpp.py index 7daa62d3..51c657e5 100644 --- a/integration_test/test_multicast_cpp.py +++ b/integration_test/test_multicast_cpp.py @@ -1,5 +1,3 @@ -from pathlib import Path - import ymmsl from libmuscle import Instance @@ -38,6 +36,6 @@ def test_multicast_cpp(tmp_path): - receiver1.in - receiver2.in""", tmp_path, - {'multicast': Path('libmuscle') / 'tests' / 'component_test'}, - {}, - {'receiver1': receiver, 'receiver2': receiver}) + {'multicast': ('cpp', 'component_test'), + 'receiver1': ('python', receiver), + 'receiver2': ('python', receiver)}) diff --git a/integration_test/test_snapshot_complex_coupling.py b/integration_test/test_snapshot_complex_coupling.py index 55bb2b81..d7c05ad3 100644 --- a/integration_test/test_snapshot_complex_coupling.py +++ b/integration_test/test_snapshot_complex_coupling.py @@ -161,15 +161,14 @@ def config(): def test_snapshot_complex_coupling(tmp_path, config): - actors = {'main': main_component} + actors = {'main': ('python', main_component)} for c in 'ABC': - actors['cache' + c] = cache_component + actors['cache' + c] = ('python', cache_component) for c in 'ABCD': - actors['calc' + c] = echo_component + actors['calc' + c] = ('python', echo_component) run_dir1 = RunDir(tmp_path / 'run1') - run_manager_with_actors( - dump(config), run_dir1.path, python_actors=actors) + run_manager_with_actors(dump(config), run_dir1.path, actors) assert len(ls_snapshots(run_dir1, 'main')) == 5 # 2.0/0.5, at_end assert len(ls_snapshots(run_dir1, 'cacheA')) == 5 # 2.0/0.5, at_end diff --git a/integration_test/test_snapshot_dispatch.py b/integration_test/test_snapshot_dispatch.py index d9082617..9b7fcfed 100644 --- a/integration_test/test_snapshot_dispatch.py +++ b/integration_test/test_snapshot_dispatch.py @@ -96,10 +96,9 @@ def dispatch_config(): def test_snapshot_dispatch(tmp_path, dispatch_config): - actors = {f'comp{i + 1}': component for i in range(5)} + actors = {f'comp{i + 1}': ('python', component) for i in range(5)} run_dir1 = RunDir(tmp_path / 'run1') - run_manager_with_actors( - dump(dispatch_config), run_dir1.path, python_actors=actors) + run_manager_with_actors(dump(dispatch_config), run_dir1.path, actors) assert len(ls_snapshots(run_dir1, 'comp1')) == 2 # t=0, at_end assert len(ls_snapshots(run_dir1, 'comp2')) == 5 # t=0, 2.5, 2.3, 2.8, at_end @@ -119,8 +118,7 @@ def test_snapshot_dispatch(tmp_path, dispatch_config): 'comp1': ls_snapshots(run_dir1, 'comp1')[1], 'comp2': ls_snapshots(run_dir1, 'comp2')[1]} - run_manager_with_actors( - dump(dispatch_config), run_dir2.path, python_actors=actors) + run_manager_with_actors(dump(dispatch_config), run_dir2.path, actors) assert len(ls_snapshots(run_dir2, 'comp1')) == 1 # resume assert len(ls_snapshots(run_dir2, 'comp2')) == 4 # resume, t=2.5, 2.8, at_end diff --git a/integration_test/test_snapshot_interact.py b/integration_test/test_snapshot_interact.py index 34ca0e8a..dca2f16c 100644 --- a/integration_test/test_snapshot_interact.py +++ b/integration_test/test_snapshot_interact.py @@ -86,10 +86,10 @@ def test_snapshot_interact_lockstep(tmp_path): stop: 2.0 - at: - 2.5""" - actors = {f'comp{i + 1}': component for i in range(2)} + actors = {f'comp{i + 1}': ('python', component) for i in range(2)} run_dir1 = RunDir(tmp_path / 'run1') - run_manager_with_actors(config, run_dir1.path, python_actors=actors) + run_manager_with_actors(config, run_dir1.path, actors) assert len(ls_snapshots(run_dir1, 'comp1')) == 3 # t=0.75, 1.75, 2.5 assert len(ls_snapshots(run_dir1, 'comp2')) == 3 # t=0.75, 1.75, 2.5 @@ -103,8 +103,7 @@ def test_snapshot_interact_lockstep(tmp_path): config_doc = load(config) config_doc.update(snapshot_docs[1]) - run_manager_with_actors( - dump(config_doc), run_dir2.path, python_actors=actors) + run_manager_with_actors(dump(config_doc), run_dir2.path, actors) assert len(ls_snapshots(run_dir2, 'comp1')) == 2 # resume, t=2.5 assert len(ls_snapshots(run_dir2, 'comp2')) == 2 # resume, t=2.5 @@ -138,11 +137,11 @@ def test_snapshot_interact_varstep(tmp_path, scale): stop: 2.0 - at: - 2.5""" - actors = {f'comp{i + 1}': component for i in range(2)} - actors['coupler'] = interact_coupling.checkpointing_temporal_coupler + actors = {f'comp{i + 1}': ('python', component) for i in range(2)} + actors['coupler'] = ('python', interact_coupling.checkpointing_temporal_coupler) run_dir1 = RunDir(tmp_path / 'run1') - run_manager_with_actors(config, run_dir1.path, python_actors=actors) + run_manager_with_actors(config, run_dir1.path, actors) assert len(ls_snapshots(run_dir1, 'comp1')) == 3 # t=0.75, 1.75, 2.5 assert len(ls_snapshots(run_dir1, 'comp2')) == 3 # t=0.75, 1.75, 2.5 @@ -156,8 +155,7 @@ def test_snapshot_interact_varstep(tmp_path, scale): config_doc = load(config) config_doc.update(snapshot_docs[1]) - run_manager_with_actors( - dump(config_doc), run_dir2.path, python_actors=actors) + run_manager_with_actors(dump(config_doc), run_dir2.path, actors) assert len(ls_snapshots(run_dir2, 'comp1')) == 2 # resume, t=2.5 assert len(ls_snapshots(run_dir2, 'comp2')) == 2 # resume, t=2.5 diff --git a/integration_test/test_snapshot_macro_micro.py b/integration_test/test_snapshot_macro_micro.py index 9d8481c2..0c1ef630 100644 --- a/integration_test/test_snapshot_macro_micro.py +++ b/integration_test/test_snapshot_macro_micro.py @@ -198,10 +198,9 @@ def config_with_transformer(base_config): def test_snapshot_macro_micro(tmp_path, base_config): - actors = {'macro': macro, 'micro': micro} + actors = {'macro': ('python', macro), 'micro': ('python', micro)} run_dir1 = RunDir(tmp_path / 'run1') - run_manager_with_actors( - dump(base_config), run_dir1.path, python_actors=actors) + run_manager_with_actors(dump(base_config), run_dir1.path, actors) macro_snapshots = ls_snapshots(run_dir1, 'macro') assert len(macro_snapshots) == 6 # 0, 0.4, 0.8, 1.2, 1.6, final @@ -221,8 +220,7 @@ def test_snapshot_macro_micro(tmp_path, base_config): # resume from the snapshots taken at t>=1.2 run_dir2 = RunDir(tmp_path / 'run2') base_config.update(snapshot_docs[4]) # add resume info - run_manager_with_actors( - dump(base_config), run_dir2.path, python_actors=actors) + run_manager_with_actors(dump(base_config), run_dir2.path, actors) assert len(ls_snapshots(run_dir2, 'macro')) == 3 # resume, 1.6, final assert len(ls_snapshots(run_dir2, 'micro')) == 3 # resume, 1.6, final @@ -233,15 +231,13 @@ def test_snapshot_macro_micro(tmp_path, base_config): base_config.resume = {} # clear resume information base_config.update(snapshot_docs[0]) # add resume info base_config.settings['macro.t_max'] = 0.6 # run shorter - run_manager_with_actors( - dump(base_config), run_dir3.path, python_actors=actors) + run_manager_with_actors(dump(base_config), run_dir3.path, actors) def test_snapshot_macro_stateless_micro(tmp_path, base_config): - actors = {'macro': macro, 'micro': stateless_micro} + actors = {'macro': ('python', macro), 'micro': ('python', stateless_micro)} run_dir1 = RunDir(tmp_path / 'run1') - run_manager_with_actors( - dump(base_config), run_dir1.path, python_actors=actors) + run_manager_with_actors(dump(base_config), run_dir1.path, actors) assert len(ls_snapshots(run_dir1, 'macro')) == 6 # 0, 0.4, 0.8, 1.2, 1.6, final assert len(ls_snapshots(run_dir1, 'micro')) == 6 # 0, 0.4, 0.8, 1.2, 1.6, final @@ -252,8 +248,7 @@ def test_snapshot_macro_stateless_micro(tmp_path, base_config): # resume from the snapshot taken at t>=1.2 run_dir2 = RunDir(tmp_path / 'run2') base_config.update(snapshot_docs[3]) # add resume info - run_manager_with_actors( - dump(base_config), run_dir2.path, python_actors=actors) + run_manager_with_actors(dump(base_config), run_dir2.path, actors) assert len(ls_snapshots(run_dir2, 'macro')) == 3 # resume, 1.6, final assert len(ls_snapshots(run_dir2, 'micro')) == 4 # resume, 1.2, 1.6, final @@ -262,11 +257,12 @@ def test_snapshot_macro_stateless_micro(tmp_path, base_config): def test_snapshot_macro_vector_micro(tmp_path, base_config): base_config.model.components[1].multiplicity = [2] - actors = {'macro': macro_vector, 'micro[0]': micro, 'micro[1]': micro} + actors = {'macro': ('python', macro_vector), + 'micro[0]': ('python', micro), + 'micro[1]': ('python', micro)} run_dir1 = RunDir(tmp_path / 'run1') - run_manager_with_actors( - dump(base_config), run_dir1.path, python_actors=actors) + run_manager_with_actors(dump(base_config), run_dir1.path, actors) assert len(ls_snapshots(run_dir1, 'macro')) == 6 # 0, 0.4, 0.8, 1.2, 1.6, final assert len(ls_snapshots(run_dir1, 'micro[0]')) == 6 # 0, 0.4, 0.8, 1.2, 1.6, final @@ -276,8 +272,7 @@ def test_snapshot_macro_vector_micro(tmp_path, base_config): run_dir2 = RunDir(tmp_path / 'run2') base_config.update(load(snapshots_ymmsl[-3])) # add resume info - run_manager_with_actors( - dump(base_config), run_dir2.path, python_actors=actors) + run_manager_with_actors(dump(base_config), run_dir2.path, actors) assert len(ls_snapshots(run_dir2, 'macro')) == 3 # resume, 1.6, final assert len(ls_snapshots(run_dir2, 'micro[0]')) == 3 # resume, 1.6, final @@ -286,12 +281,13 @@ def test_snapshot_macro_vector_micro(tmp_path, base_config): def test_snapshot_macro_transformer_micro(tmp_path, config_with_transformer): - actors = {'macro': macro, 'micro': micro, 'transformer1': data_transformer, - 'transformer2': data_transformer} + actors = {'macro': ('python', macro), + 'micro': ('python', micro), + 'transformer1': ('python', data_transformer), + 'transformer2': ('python', data_transformer)} run_dir1 = RunDir(tmp_path / 'run1') - run_manager_with_actors( - dump(config_with_transformer), run_dir1.path, python_actors=actors) + run_manager_with_actors(dump(config_with_transformer), run_dir1.path, actors) snapshots_ymmsl = ls_snapshots(run_dir1) assert len(snapshots_ymmsl) == 8 @@ -299,8 +295,7 @@ def test_snapshot_macro_transformer_micro(tmp_path, config_with_transformer): # pick one to resume from run_dir2 = RunDir(tmp_path / 'run2') config_with_transformer.update(load(snapshots_ymmsl[4])) # add resume info - run_manager_with_actors( - dump(config_with_transformer), run_dir2.path, python_actors=actors) + run_manager_with_actors(dump(config_with_transformer), run_dir2.path, actors) snapshots_ymmsl = ls_snapshots(run_dir2) assert len(snapshots_ymmsl) == 6 From 90e73b49fa15ddaa2d8fc831688470cc59ddc90d Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 11:25:12 +0100 Subject: [PATCH 059/188] Fix comparison bug for short port names --- libmuscle/cpp/src/libmuscle/communicator.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libmuscle/cpp/src/libmuscle/communicator.cpp b/libmuscle/cpp/src/libmuscle/communicator.cpp index 39428607..5555ac7b 100644 --- a/libmuscle/cpp/src/libmuscle/communicator.cpp +++ b/libmuscle/cpp/src/libmuscle/communicator.cpp @@ -486,12 +486,14 @@ std::tuple Communicator::split_port_desc_( std::string port_name(port_desc); bool is_vector = false; - if (port_desc.rfind("[]") == (port_desc.size() - 2)) { + auto found = port_desc.rfind("[]"); + if (found != std::string::npos && found == (port_desc.size() - 2)) { is_vector = true; port_name = port_desc.substr(0, port_desc.size() - 2); } - if (port_name.rfind("[]") == (port_name.size() - 2)) { + found = port_name.rfind("[]"); + if (found != std::string::npos && found == (port_name.size() - 2)) { std::ostringstream oss; oss << "Port description '" << port_desc << "' is invalid: ports can"; oss << " have at most one dimension."; From 22b9fbd9834710c2cab421829f756e28cfce5bb4 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 11:26:04 +0100 Subject: [PATCH 060/188] Integration tests: finish manager on failed test --- integration_test/conftest.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/integration_test/conftest.py b/integration_test/conftest.py index 433ebadf..0bf73b35 100644 --- a/integration_test/conftest.py +++ b/integration_test/conftest.py @@ -50,10 +50,12 @@ def make_server_process(ymmsl_doc, tmpdir): process.start() control_pipe[1].close() # wait for start - yield control_pipe[0].recv() - control_pipe[0].send(True) - control_pipe[0].close() - process.join() + try: + yield control_pipe[0].recv() + finally: + control_pipe[0].send(True) + control_pipe[0].close() + process.join() def _python_wrapper(instance_name, muscle_manager, callable): From b46018345f89ac1ddfb8fd0b307d867baaad163e Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 11:29:14 +0100 Subject: [PATCH 061/188] Fix typo --- libmuscle/cpp/src/libmuscle/mmp_client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libmuscle/cpp/src/libmuscle/mmp_client.cpp b/libmuscle/cpp/src/libmuscle/mmp_client.cpp index a221a038..ef59f9a2 100644 --- a/libmuscle/cpp/src/libmuscle/mmp_client.cpp +++ b/libmuscle/cpp/src/libmuscle/mmp_client.cpp @@ -112,7 +112,7 @@ namespace { "triggers", encode_vector(snapshot_metadata.triggers), "wallclock_time", snapshot_metadata.wallclock_time, "timestamp", snapshot_metadata.timestamp, - "next_timsetamp", encode_optional(snapshot_metadata.next_timestamp), + "next_timestamp", encode_optional(snapshot_metadata.next_timestamp), "port_message_counts", port_message_counts, "is_final_snapshot", snapshot_metadata.is_final_snapshot, "snapshot_filename", snapshot_metadata.snapshot_filename From a0c20ccb79f154aaf1410041de68ca655ed0a7d5 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 11:44:00 +0100 Subject: [PATCH 062/188] Convert MPI env variable check into a mark.skipif --- integration_test/conftest.py | 4 ++++ integration_test/test_mpi_macro_micro.py | 8 ++------ integration_test/test_start_mpi.py | 9 ++------- integration_test/test_start_script.py | 9 ++------- 4 files changed, 10 insertions(+), 20 deletions(-) diff --git a/integration_test/conftest.py b/integration_test/conftest.py index 0bf73b35..eab19aa9 100644 --- a/integration_test/conftest.py +++ b/integration_test/conftest.py @@ -20,6 +20,10 @@ 'MUSCLE_TEST_PYTHON_ONLY' in os.environ, reason='Python-only tests requested') +skip_if_no_mpi_cpp = pytest.mark.skipif( + 'MUSCLE_ENABLE_CPP_MPI' not in os.environ, + reason='MPI support was not detected') + @pytest.fixture def yatiml_log_warning(): diff --git a/integration_test/test_mpi_macro_micro.py b/integration_test/test_mpi_macro_micro.py index 2e9eb39a..69463f35 100644 --- a/integration_test/test_mpi_macro_micro.py +++ b/integration_test/test_mpi_macro_micro.py @@ -1,14 +1,13 @@ import multiprocessing as mp import os from pathlib import Path -import pytest import subprocess import sys from libmuscle import Instance, Message from ymmsl import Operator -from .conftest import skip_if_python_only +from .conftest import skip_if_python_only, skip_if_no_mpi_cpp def run_macro(instance_id: str, muscle_manager: str): @@ -37,12 +36,9 @@ def macro(): @skip_if_python_only +@skip_if_no_mpi_cpp def test_mpi_macro_micro( tmpdir, mmp_server_process_simple, mpirun_outfile_arg): - # only run this if MPI is enabled - if 'MUSCLE_ENABLE_CPP_MPI' not in os.environ: - pytest.skip('MPI support was not detected') - # create C++ micro model # see libmuscle/cpp/src/libmuscle/tests/micro_model_test.cpp cpp_build_dir = Path(__file__).parents[1] / 'libmuscle' / 'cpp' / 'build' diff --git a/integration_test/test_start_mpi.py b/integration_test/test_start_mpi.py index 2ba3d522..38dae114 100644 --- a/integration_test/test_start_mpi.py +++ b/integration_test/test_start_mpi.py @@ -1,21 +1,16 @@ -import os from pathlib import Path -import pytest import ymmsl from libmuscle.manager.manager import Manager from libmuscle.manager.run_dir import RunDir -from .conftest import skip_if_python_only +from .conftest import skip_if_python_only, skip_if_no_mpi_cpp @skip_if_python_only +@skip_if_no_mpi_cpp def test_start_mpi(tmpdir, mpi_exec_model): - # only run this if MPI is enabled - if 'MUSCLE_ENABLE_CPP_MPI' not in os.environ: - pytest.skip('MPI support was not detected') - tmppath = Path(str(tmpdir)) # find our test components and their requirements diff --git a/integration_test/test_start_script.py b/integration_test/test_start_script.py index b5688e6d..7cda6c5a 100644 --- a/integration_test/test_start_script.py +++ b/integration_test/test_start_script.py @@ -1,21 +1,16 @@ -import os from pathlib import Path -import pytest import ymmsl from libmuscle.manager.manager import Manager from libmuscle.manager.run_dir import RunDir -from .conftest import skip_if_python_only +from .conftest import skip_if_python_only, skip_if_no_mpi_cpp @skip_if_python_only +@skip_if_no_mpi_cpp def test_start_script(tmpdir): - # only run this if MPI is enabled - if 'MUSCLE_ENABLE_CPP_MPI' not in os.environ: - pytest.skip('MPI support was not detected') - tmppath = Path(str(tmpdir)) # find our test components and their requirements From d3f52c9d7bc0e55df87ae49e77f9d8cb270069b3 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 12:09:15 +0100 Subject: [PATCH 063/188] Support mpi actors in run_manager_with_actors --- integration_test/conftest.py | 28 +++++++------ integration_test/test_mpi_macro_micro.py | 50 +++--------------------- 2 files changed, 23 insertions(+), 55 deletions(-) diff --git a/integration_test/conftest.py b/integration_test/conftest.py index eab19aa9..c9fde77c 100644 --- a/integration_test/conftest.py +++ b/integration_test/conftest.py @@ -75,8 +75,8 @@ def run_manager_with_actors(ymmsl_text, tmpdir, actors): actors: a dictionary of lists containing details for each actor: ``{"instance_name": ("language", "details", ...)}``. - Language can be ``"python"``, ``"cpp"`` or ``"fortran"``. Details - differ per language. + Language can be ``"python"``, ``"cpp"``, ``"mpi_cpp"`` or ``"fortran"``. + Details differ per language. For python actors, details is a single callable which is executed in a ``multiprocessing.Process``. @@ -85,6 +85,9 @@ def run_manager_with_actors(ymmsl_text, tmpdir, actors): The executable paths are assumed to be relative to ``../libmuscle/cpp/build/libmuscle/tests``. + For mpi cpp actors, details is an executable path (see cpp), then number of + processes and optionally arguments passed to the executable. + For fortran actors, details is an executable path. Executable paths are assumed to be relative to ``../libmuscle/fortran/build/libmuscle/tests``. @@ -122,13 +125,18 @@ def run_manager_with_actors(ymmsl_text, tmpdir, actors): python_processes.append(proc) continue elif language == "cpp": - build_dir = cpp_build_dir + executable = cpp_build_dir / actor + elif language == "mpicpp": + assert len(args) > 0, "must provide at least number of mpi instances" + executable = 'mpirun' + out_file = tmpdir / f'mpi_{instance_name}.log' + args = ('-np', args[0], mpirun_outfile_arg(), str(out_file), + str(cpp_build_dir / actor), *args[1:]) elif language == "fortran": - build_dir = fortran_build_dir + executable = fortran_build_dir / actor else: raise ValueError(f"Unknown {language=}") # start native code actor - executable = build_dir / actor f_out = stack.enter_context( (tmpdir / f'{instance_name}_stdout.txt').open('w')) f_err = stack.enter_context( @@ -230,7 +238,6 @@ def log_file_in_tmpdir(tmpdir): os.chdir(old_workdir) -@pytest.fixture def mpi_is_intel(): if 'MUSCLE_ENABLE_CPP_MPI' not in os.environ: return None @@ -240,17 +247,16 @@ def mpi_is_intel(): return 'Intel' in result.stdout.decode('utf-8') -@pytest.fixture -def mpirun_outfile_arg(mpi_is_intel): - if mpi_is_intel: +def mpirun_outfile_arg(): + if mpi_is_intel(): return '-outfile-pattern' else: return '--output-filename' @pytest.fixture -def mpi_exec_model(mpi_is_intel): - if mpi_is_intel: +def mpi_exec_model(): + if mpi_is_intel(): return 'intelmpi' else: return 'openmpi' diff --git a/integration_test/test_mpi_macro_micro.py b/integration_test/test_mpi_macro_micro.py index 69463f35..7cd5138a 100644 --- a/integration_test/test_mpi_macro_micro.py +++ b/integration_test/test_mpi_macro_micro.py @@ -1,19 +1,7 @@ -import multiprocessing as mp -import os -from pathlib import Path -import subprocess -import sys - from libmuscle import Instance, Message from ymmsl import Operator -from .conftest import skip_if_python_only, skip_if_no_mpi_cpp - - -def run_macro(instance_id: str, muscle_manager: str): - sys.argv.append('--muscle-instance={}'.format(instance_id)) - sys.argv.append('--muscle-manager={}'.format(muscle_manager)) - macro() +from .conftest import run_manager_with_actors, skip_if_python_only, skip_if_no_mpi_cpp def macro(): @@ -37,34 +25,8 @@ def macro(): @skip_if_python_only @skip_if_no_mpi_cpp -def test_mpi_macro_micro( - tmpdir, mmp_server_process_simple, mpirun_outfile_arg): - # create C++ micro model - # see libmuscle/cpp/src/libmuscle/tests/micro_model_test.cpp - cpp_build_dir = Path(__file__).parents[1] / 'libmuscle' / 'cpp' / 'build' - env = os.environ.copy() - lib_paths = [cpp_build_dir / 'msgpack' / 'msgpack' / 'lib'] - if 'LD_LIBRARY_PATH' in env: - env['LD_LIBRARY_PATH'] += ':' + ':'.join(map(str, lib_paths)) - else: - env['LD_LIBRARY_PATH'] = ':'.join(map(str, lib_paths)) - - env['MUSCLE_MANAGER'] = mmp_server_process_simple - - cpp_test_dir = cpp_build_dir / 'libmuscle' / 'tests' - mpi_test_micro = cpp_test_dir / 'mpi_micro_model_test' - out_file = tmpdir + '/mpi_micro.log' - micro_result = subprocess.Popen( - ['mpirun', '-np', '2', mpirun_outfile_arg, out_file, - str(mpi_test_micro), '--muscle-instance=micro'], env=env) - - # run macro model - macro_process = mp.Process(target=run_macro, - args=('macro', mmp_server_process_simple)) - macro_process.start() - - # check results - micro_result.wait() - assert micro_result.returncode == 0 - macro_process.join() - assert macro_process.exitcode == 0 +def test_mpi_macro_micro(tmpdir, mmp_server_config_simple): + actors = { + 'macro': ('python', macro), + 'micro': ('mpicpp', 'mpi_micro_model_test', '2')} # 2 processes + run_manager_with_actors(mmp_server_config_simple, tmpdir, actors) From 436430f80cf80a76837bad94c645d484d4d634bd Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 12:10:42 +0100 Subject: [PATCH 064/188] Ad C++ integration tests for snapshot_macro_micro --- integration_test/test_snapshot_macro_micro.py | 32 ++- libmuscle/cpp/build/libmuscle/tests/Makefile | 3 + .../tests/mpi_snapshot_micro_test.cpp | 102 ++++++++ .../tests/snapshot_components_test.cpp | 220 ++++++++++++++++++ 4 files changed, 350 insertions(+), 7 deletions(-) create mode 100644 libmuscle/cpp/src/libmuscle/tests/mpi_snapshot_micro_test.cpp create mode 100644 libmuscle/cpp/src/libmuscle/tests/snapshot_components_test.cpp diff --git a/integration_test/test_snapshot_macro_micro.py b/integration_test/test_snapshot_macro_micro.py index 0c1ef630..e7b36ea6 100644 --- a/integration_test/test_snapshot_macro_micro.py +++ b/integration_test/test_snapshot_macro_micro.py @@ -5,7 +5,8 @@ Instance, Message, KEEPS_NO_STATE_FOR_NEXT_USE, USES_CHECKPOINT_API) from libmuscle.manager.run_dir import RunDir -from .conftest import run_manager_with_actors, ls_snapshots +from .conftest import ( + run_manager_with_actors, ls_snapshots, skip_if_python_only, skip_if_no_mpi_cpp) _LOG_LEVEL = 'INFO' # set to DEBUG for additional debug info @@ -197,8 +198,17 @@ def config_with_transformer(base_config): return base_config -def test_snapshot_macro_micro(tmp_path, base_config): - actors = {'macro': ('python', macro), 'micro': ('python', micro)} +@pytest.mark.parametrize('actors', [ + {'macro': ('python', macro), 'micro': ('python', micro)}, + pytest.param( + {'macro': ('cpp', 'snapshot_components_test', 'macro'), + 'micro': ('cpp', 'snapshot_components_test', 'micro')}, + marks=skip_if_python_only), + pytest.param( + {'macro': ('python', macro), 'micro': ('cpp', 'mpi_snapshot_micro_test', '2')}, + marks=[skip_if_python_only, skip_if_no_mpi_cpp]) +]) +def test_snapshot_macro_micro(tmp_path, base_config, actors): run_dir1 = RunDir(tmp_path / 'run1') run_manager_with_actors(dump(base_config), run_dir1.path, actors) @@ -234,8 +244,12 @@ def test_snapshot_macro_micro(tmp_path, base_config): run_manager_with_actors(dump(base_config), run_dir3.path, actors) -def test_snapshot_macro_stateless_micro(tmp_path, base_config): - actors = {'macro': ('python', macro), 'micro': ('python', stateless_micro)} +@pytest.mark.parametrize('micro_actor', [ + ('python', stateless_micro), + ('cpp', 'snapshot_components_test', 'stateless_micro'), +]) +def test_snapshot_macro_stateless_micro(tmp_path, base_config, micro_actor): + actors = {'macro': ('python', macro), 'micro': micro_actor} run_dir1 = RunDir(tmp_path / 'run1') run_manager_with_actors(dump(base_config), run_dir1.path, actors) @@ -255,9 +269,13 @@ def test_snapshot_macro_stateless_micro(tmp_path, base_config): assert len(ls_snapshots(run_dir2)) == 3 -def test_snapshot_macro_vector_micro(tmp_path, base_config): +@pytest.mark.parametrize('macro_actor', [ + ('python', macro_vector), + ('cpp', 'snapshot_components_test', 'macro_vector'), +]) +def test_snapshot_macro_vector_micro(tmp_path, base_config, macro_actor): base_config.model.components[1].multiplicity = [2] - actors = {'macro': ('python', macro_vector), + actors = {'macro': macro_actor, 'micro[0]': ('python', micro), 'micro[1]': ('python', micro)} diff --git a/libmuscle/cpp/build/libmuscle/tests/Makefile b/libmuscle/cpp/build/libmuscle/tests/Makefile index 327b4ddb..2dea6170 100644 --- a/libmuscle/cpp/build/libmuscle/tests/Makefile +++ b/libmuscle/cpp/build/libmuscle/tests/Makefile @@ -97,6 +97,9 @@ mpi_micro_model_test: mpi_micro_model_test.cpp $(CURDIR)/../../ymmsl/libymmsl_d. mpi_component_test: mpi_component_test.cpp $(CURDIR)/../../ymmsl/libymmsl_d.a $(CURDIR)/../libmuscle_mpi_d.a $(MPICXX) $(CXXFLAGS) $(DEBUGFLAGS) -I$(CURDIR)/.. $(MPIFLAGS) $^ -o $@ $(LDFLAGS2) +mpi_snapshot_micro_test: mpi_snapshot_micro_test.cpp $(CURDIR)/../../ymmsl/libymmsl_d.a $(CURDIR)/../libmuscle_mpi_d.a + $(MPICXX) $(CXXFLAGS) $(DEBUGFLAGS) -I$(CURDIR)/.. $(MPIFLAGS) $^ -o $@ $(LDFLAGS2) + empty := space := $(empty) $(empty) diff --git a/libmuscle/cpp/src/libmuscle/tests/mpi_snapshot_micro_test.cpp b/libmuscle/cpp/src/libmuscle/tests/mpi_snapshot_micro_test.cpp new file mode 100644 index 00000000..b9e292ee --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/tests/mpi_snapshot_micro_test.cpp @@ -0,0 +1,102 @@ +/* This is a part of the integration test suite, and is run from a Python + * test in /integration_test. It is not a unit test. + */ +#include + +#include +#include + +#include + + +using libmuscle::Data; +using libmuscle::DataConstRef; +using libmuscle::Instance; +using libmuscle::InstanceFlags; +using libmuscle::Message; +using ymmsl::Operator; + + +void mpi_micro(int argc, char * argv[]) { + MPI_Init(&argc, &argv); + int rank, num_ranks; + MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + Instance instance( + argc, argv, { + {Operator::F_INIT, {"f_i"}}, + {Operator::O_F, {"o_f"}}}, + InstanceFlags::USES_CHECKPOINT_API, + MPI_COMM_WORLD, 0); + + while (instance.reuse_instance()) { + double dt = instance.get_setting_as("dt"); + double t_max = instance.get_setting_as("t_max"); + double t_cur, t_stop; + int i; + + if (instance.resuming()) { + if (rank == 0) { + auto msg = instance.load_snapshot(); + // load state from message + t_cur = msg.timestamp(); + i = msg.data()[0].as(); + t_stop = msg.data()[1].as(); + // and broadcast + MPI_Bcast(&t_cur, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&i, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&t_stop, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } else { + MPI_Bcast(&t_cur, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&i, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&t_stop, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } + } + + if (instance.should_init()) { + if (rank == 0) { + auto msg = instance.receive("f_i"); + t_cur = msg.timestamp(); + i = msg.data().as(); + t_stop = t_cur + t_max; + MPI_Bcast(&t_cur, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&i, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&t_stop, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } else { + MPI_Bcast(&t_cur, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&i, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&t_stop, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + } + } + + while (t_cur <= t_stop) { + // faux time-integration for testing snapshots + t_cur += dt; + + if (instance.should_save_snapshot(t_cur)) { + // [Optional] collectively gather state + if (rank == 0) + // Only root can save + instance.save_snapshot(Message(t_cur, Data::list(i, t_stop))); + } + } + + // Message is only sent from root process + instance.send("o_f", Message(t_cur, i)); + + if (instance.should_save_final_snapshot()) { + // [Optional] collectively gather state + if (rank == 0) + // Only root can save + instance.save_final_snapshot(Message(t_cur, Data::list(i, t_stop))); + } + } +} + + +int main(int argc, char * argv[]) { + mpi_micro(argc, argv); + return EXIT_SUCCESS; +} + diff --git a/libmuscle/cpp/src/libmuscle/tests/snapshot_components_test.cpp b/libmuscle/cpp/src/libmuscle/tests/snapshot_components_test.cpp new file mode 100644 index 00000000..72d0b65f --- /dev/null +++ b/libmuscle/cpp/src/libmuscle/tests/snapshot_components_test.cpp @@ -0,0 +1,220 @@ +/* This is a part of the integration test suite, and is run from a Python + * test in /integration_test. It is not a unit test. + * + * This is a collection of actors used in test_snapshot_macro_micro.py + */ +#include +#include +#include + +#include +#include + +using namespace std::string_literals; + +using libmuscle::Data; +using libmuscle::DataConstRef; +using libmuscle::Instance; +using libmuscle::InstanceFlags; +using libmuscle::Message; +using ymmsl::Operator; + + +/** A simple snapshotting macro component used in + * integration_tests/test_snapshot_macro_micro.py + */ +void macro(int argc, char * argv[]) { + Instance instance( + argc, argv, { + {Operator::O_I, {"o_i"}}, + {Operator::S, {"s"}}}, + InstanceFlags::USES_CHECKPOINT_API); + + while (instance.reuse_instance()) { + double dt = instance.get_setting_as("dt"); + double t_max = instance.get_setting_as("t_max"); + double t_cur; + int i; + + if (instance.resuming()) { + auto msg = instance.load_snapshot(); + // load state from message + t_cur = msg.timestamp(); + i = msg.data().as(); + assert(i >= 1); + } + + if (instance.should_init()) { + t_cur = instance.get_setting_as("t0"); + i = 0; + } + + while (t_cur + dt <= t_max) { + Message msg(t_cur, i); + double t_next = t_cur + dt; + if (t_next + dt <= t_max) + msg.set_next_timestamp(t_next); + instance.send("o_i", msg); + + msg = instance.receive("s"); + assert(msg.data().as() == i); + + i ++; + t_cur += dt; + + if (instance.should_save_snapshot(t_cur)) + instance.save_snapshot(Message(t_cur, i)); + } + + if (instance.should_save_final_snapshot()) + instance.save_final_snapshot(Message(t_cur, i)); + } +} + + +void macro_vector(int argc, char * argv[]) { + Instance instance( + argc, argv, { + {Operator::O_I, {"o_i[]"}}, + {Operator::S, {"s[]"}}}, + InstanceFlags::USES_CHECKPOINT_API); + + while (instance.reuse_instance()) { + double dt = instance.get_setting_as("dt"); + double t_max = instance.get_setting_as("t_max"); + double t_cur; + int i; + + if (instance.resuming()) { + auto msg = instance.load_snapshot(); + // load state from message + t_cur = msg.timestamp(); + i = msg.data().as(); + assert(i >= 1); + } + + if (instance.should_init()) { + t_cur = instance.get_setting_as("t0"); + i = 0; + } + + while (t_cur + dt <= t_max) { + Message msg(t_cur, i); + double t_next = t_cur + dt; + if (t_next + dt <= t_max) + msg.set_next_timestamp(t_next); + for (int slot = 0; slot < instance.get_port_length("o_i"); ++slot) + instance.send("o_i", msg, slot); + + for (int slot = 0; slot < instance.get_port_length("s"); ++slot) { + msg = instance.receive("s", slot); + assert(msg.data().as() == i); + } + + i ++; + t_cur += dt; + + if (instance.should_save_snapshot(t_cur)) + instance.save_snapshot(Message(t_cur, i)); + } + + if (instance.should_save_final_snapshot()) + instance.save_final_snapshot(Message(t_cur, i)); + } +} + + +void micro(int argc, char * argv[]) { + Instance instance( + argc, argv, { + {Operator::F_INIT, {"f_i"}}, + {Operator::O_F, {"o_f"}}}, + InstanceFlags::USES_CHECKPOINT_API); + + while (instance.reuse_instance()) { + double dt = instance.get_setting_as("dt"); + double t_max = instance.get_setting_as("t_max"); + double t_cur, t_stop; + int i; + + if (instance.resuming()) { + auto msg = instance.load_snapshot(); + // load state from message + t_cur = msg.timestamp(); + i = msg.data()[0].as(); + t_stop = msg.data()[1].as(); + } + + if (instance.should_init()) { + auto msg = instance.receive("f_i"); + t_cur = msg.timestamp(); + i = msg.data().as(); + t_stop = t_cur + t_max; + } + + while (t_cur <= t_stop) { + // faux time-integration for testing snapshots + t_cur += dt; + + if (instance.should_save_snapshot(t_cur)) + instance.save_snapshot(Message(t_cur, Data::list(i, t_stop))); + } + + instance.send("o_f", Message(t_cur, i)); + + if (instance.should_save_final_snapshot()) + instance.save_final_snapshot(Message(t_cur, Data::list(i, t_stop))); + } +} + + +void stateless_micro(int argc, char * argv[]) { + Instance instance( + argc, argv, { + {Operator::F_INIT, {"f_i"}}, + {Operator::O_F, {"o_f"}}}, + InstanceFlags::KEEPS_NO_STATE_FOR_NEXT_USE); + + while (instance.reuse_instance()) { + double dt = instance.get_setting_as("dt"); + double t_max = instance.get_setting_as("t_max"); + + auto msg = instance.receive("f_i"); + auto t_cur = msg.timestamp(); + auto i = msg.data().as(); + auto t_stop = t_cur + t_max; + + while (t_cur <= t_stop) { + // faux time-integration for testing snapshots + t_cur += dt; + } + + instance.send("o_f", Message(t_cur, i)); + } +} + + +int main(int argc, char * argv[]) { + if (argc > 1) { + if (argv[1] == "macro"s) { + macro(argc, argv); + return EXIT_SUCCESS; + } else if (argv[1] == "macro_vector"s) { + macro_vector(argc, argv); + return EXIT_SUCCESS; + } else if (argv[1] == "micro"s) { + micro(argc, argv); + return EXIT_SUCCESS; + } else if (argv[1] == "stateless_micro"s) { + stateless_micro(argc, argv); + return EXIT_SUCCESS; + } + std::cerr << "Unknown component name: " << argv[1] << std::endl; + } else { + std::cerr << "No component name provided." << std::endl; + } + std::cerr << "Valid component names are: macro, macro_vector, micro"; + std::cerr << " and stateless_micro" << std::endl; + return EXIT_FAILURE; +} + From 1401d94d1747448c98755cbcce5797e19d0fcb9c Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 12:59:43 +0100 Subject: [PATCH 065/188] Python 3.7 compatibility --- integration_test/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_test/conftest.py b/integration_test/conftest.py index c9fde77c..b7c5a0d5 100644 --- a/integration_test/conftest.py +++ b/integration_test/conftest.py @@ -135,7 +135,7 @@ def run_manager_with_actors(ymmsl_text, tmpdir, actors): elif language == "fortran": executable = fortran_build_dir / actor else: - raise ValueError(f"Unknown {language=}") + raise ValueError(f"Unknown language: {language}") # start native code actor f_out = stack.enter_context( (tmpdir / f'{instance_name}_stdout.txt').open('w')) From 8933c5049c9fcb57e944a108aeb140074e8e294e Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 13:03:15 +0100 Subject: [PATCH 066/188] Skip cpp actors if_python_only --- integration_test/test_snapshot_macro_micro.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/integration_test/test_snapshot_macro_micro.py b/integration_test/test_snapshot_macro_micro.py index e7b36ea6..df500dfd 100644 --- a/integration_test/test_snapshot_macro_micro.py +++ b/integration_test/test_snapshot_macro_micro.py @@ -246,7 +246,9 @@ def test_snapshot_macro_micro(tmp_path, base_config, actors): @pytest.mark.parametrize('micro_actor', [ ('python', stateless_micro), - ('cpp', 'snapshot_components_test', 'stateless_micro'), + pytest.param( + ('cpp', 'snapshot_components_test', 'stateless_micro'), + marks=skip_if_python_only) ]) def test_snapshot_macro_stateless_micro(tmp_path, base_config, micro_actor): actors = {'macro': ('python', macro), 'micro': micro_actor} @@ -271,7 +273,9 @@ def test_snapshot_macro_stateless_micro(tmp_path, base_config, micro_actor): @pytest.mark.parametrize('macro_actor', [ ('python', macro_vector), - ('cpp', 'snapshot_components_test', 'macro_vector'), + pytest.param( + ('cpp', 'snapshot_components_test', 'macro_vector'), + marks=skip_if_python_only) ]) def test_snapshot_macro_vector_micro(tmp_path, base_config, macro_actor): base_config.model.components[1].multiplicity = [2] From 8181c98300b5cbd7b9b6c9e0c3e8d665362abfc9 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 13:56:19 +0100 Subject: [PATCH 067/188] Remove unused code --- integration_test/test_snapshot_dispatch.py | 25 +--------------------- 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/integration_test/test_snapshot_dispatch.py b/integration_test/test_snapshot_dispatch.py index 9b7fcfed..8e6dcea7 100644 --- a/integration_test/test_snapshot_dispatch.py +++ b/integration_test/test_snapshot_dispatch.py @@ -1,8 +1,7 @@ import pytest from ymmsl import Operator, load, dump -from libmuscle import ( - Instance, Message, KEEPS_NO_STATE_FOR_NEXT_USE, USES_CHECKPOINT_API) +from libmuscle import Instance, Message, USES_CHECKPOINT_API from libmuscle.manager.run_dir import RunDir from .conftest import run_manager_with_actors, ls_snapshots @@ -44,28 +43,6 @@ def component(): instance.save_final_snapshot(Message(t_cur, data=[i, t_stop])) -def stateless_component(): - instance = Instance({ - Operator.F_INIT: ['f_i'], - Operator.O_F: ['o_f']}, - KEEPS_NO_STATE_FOR_NEXT_USE) - - while instance.reuse_instance(): - dt = instance.get_setting('dt', 'float') - t_max = instance.get_setting('t_max', 'float') - - msg = instance.receive('f_i', default=Message(0, data=0)) - t_cur = msg.timestamp - i = msg.data - t_stop = t_cur + t_max - - while t_cur < t_stop: - # faux time-integration for testing snapshots - t_cur += dt - - instance.send('o_f', Message(t_cur, data=i)) - - @pytest.fixture def dispatch_config(): return load(f"""ymmsl_version: v0.1 From 127bc53d57b43859472f235381de53fc5964418d Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 16:36:28 +0100 Subject: [PATCH 068/188] Add MPI documentation to C++ API --- libmuscle/cpp/src/libmuscle/instance.hpp | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/libmuscle/cpp/src/libmuscle/instance.hpp b/libmuscle/cpp/src/libmuscle/instance.hpp index dca27379..629474ad 100644 --- a/libmuscle/cpp/src/libmuscle/instance.hpp +++ b/libmuscle/cpp/src/libmuscle/instance.hpp @@ -640,6 +640,9 @@ class Instance { * snapshot, the submodel must load its state from the snapshot as returned * Instance::load_snapshot. * + * MPI-based components must call this function in all processes + * simultaneously. + * * @return true iff the submodel must resume from a snapshot. */ bool resuming(); @@ -652,6 +655,9 @@ class Instance { * execute the F_INIT phase of the submodel execution loop. Use this method * before attempting to receive data on F_INIT ports. * + * MPI-based components must call this function in all processes + * simultaneously. + * * @return true if the submodel must execute the F_INIT step. * @return false otherwise. */ @@ -661,6 +667,11 @@ class Instance { * * Must only be called when Instance::resuming returns True. * + * MPI-based components may only call this from the root process. An error + * is raised when attempting to call this method in any other process. It + * is therefore up to the model code to scatter or broadcast the snapshot + * state to the non-root processes, if necessary. + * * @return Message containing the state as saved in a previous run * through Instance::save_snapshot or Instance::save_final_snapshot. */ @@ -678,6 +689,9 @@ class Instance { * See also Instance::should_save_final_snapshot for the variant that must be * called at the end of the reuse loop. * + * MPI-based components must call this function in all processes + * simultaneously. + * * @param timestamp current timestamp of the submodel. * @return true iff a snapshot should be taken by the submodel according to the * checkpoint rules provided in the ymmsl configuration. @@ -692,6 +706,11 @@ class Instance { * use the same timestamp in the provided Message object as used to query * Instance::should_save_snapshot. * + * MPI-based components may only call this from the root process. An error + * is raised when attempting to call this method in any other process. It + * is therefore up to the model code to gather the necessary state from + * the non-root processes before saving the snapshot. + * * @param message Message object that is saved as snapshot. The message * timestamp attribute should be the same as passed to * Instance::should_save_snapshot. The data attribute can be used to @@ -710,6 +729,9 @@ class Instance { * See also Instance::should_save_snapshot for the variant that may be called * inside of a time-integration loop of the submodel. * + * MPI-based components must call this function in all processes + * simultaneously. + * * \note * This method will block until it can determine whether a final * snapshot should be taken. This means it must also determine if this @@ -729,6 +751,11 @@ class Instance { * See also Instance::save_snapshot for the variant that may be called after * each S Operator of the submodel. * + * MPI-based components may only call this from the root process. An error + * is raised when attempting to call this method in any other process. It + * is therefore up to the model code to gather the necessary state from + * the non-root processes before saving the snapshot. + * * @param message Message object that is saved as snapshot. The data * attribute can be used to store the internal state of the * submodel. From 8f2a3131d9d47e6fd1e5a2e0c6c40d20c0366b2c Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Mon, 20 Feb 2023 16:38:03 +0100 Subject: [PATCH 069/188] Checkpointing API in Fortran --- docs/source/fortran_api.rst | 141 +++++++++++++++ integration_test/test_snapshot_macro_micro.py | 14 +- .../cpp/build/libmuscle/libmuscle.version | 7 + .../cpp/build/libmuscle/libmuscle_mpi.version | 7 + .../bindings/libmuscle_fortran_c.cpp | 44 +++++ .../bindings/libmuscle_mpi_fortran_c.cpp | 44 +++++ libmuscle/fortran/src/libmuscle/libmuscle.f90 | 162 ++++++++++++++++++ .../fortran/src/libmuscle/libmuscle_mpi.f90 | 162 ++++++++++++++++++ .../tests/fortran_snapshot_macro_test.f90 | 86 ++++++++++ .../fortran_snapshot_macro_vector_test.f90 | 90 ++++++++++ .../tests/fortran_snapshot_micro_test.f90 | 98 +++++++++++ .../fortran_snapshot_stateless_micro_test.f90 | 54 ++++++ scripts/make_libmuscle_api.py | 9 +- 13 files changed, 915 insertions(+), 3 deletions(-) create mode 100644 libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_macro_test.f90 create mode 100644 libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_macro_vector_test.f90 create mode 100644 libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_micro_test.f90 create mode 100644 libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_stateless_micro_test.f90 diff --git a/docs/source/fortran_api.rst b/docs/source/fortran_api.rst index 90bb433b..d7018122 100644 --- a/docs/source/fortran_api.rst +++ b/docs/source/fortran_api.rst @@ -2116,6 +2116,147 @@ LIBMUSCLE_Instance :r message: The received message. :rtype message: LIBMUSCLE_Message +.. f:function:: LIBMUSCLE_Instance_resuming() + + Check if this instance is resuming from a snapshot. + + Must be used by submodels that implement the checkpointing API. You'll + get a RuntimeError when not calling this method in an iteration of the + reuse loop. + + This method returns True for the first iteration of the reuse loop after + resuming from a previously taken snapshot. When resuming from a + snapshot, the submodel must load its state from the snapshot as returned + :f:func:`LIBMUSCLE_Instance_load_snapshot`. + + MPI-based components must call this function in all processes + simultaneously. + + :r resuming: ``.true.`` iff the submodel must resume from a snapshot. + :rtype resuming: logical + +.. f:function:: LIBMUSCLE_Instance_should_init() + + Check if this instance should initialize. + + Must be used by submodels that implement the checkpointing API. + + When resuming from a previous snapshot, instances need not always + execute the F_INIT phase of the submodel execution loop. Use this method + before attempting to receive data on F_INIT ports. + + MPI-based components must call this function in all processes + simultaneously. + + :r should_init: ``.true.`` iff the submomdel must execute the F_INIT step. + :rtype should_init: logical + +.. f:function:: LIBMUSCLE_Instance_load_snapshot() + + Load a snapshot. + + Must only be called when Instance::resuming returns True. + + MPI-based components may only call this from the root process. An error + is raised when attempting to call this method in any other process. It + is therefore up to the model code to scatter or broadcast the snapshot + state to the non-root processes, if necessary. + + :r message: Message containing the state as saved in a previous run through + :f:func:`LIBMUSCLE_Instance_save_snapshot` or + :f:func:`LIBMUSCLE_Instance_save_final_snapshot`. + :rtype message: LIBMUSCLE_Message + +.. f:function:: LIBMUSCLE_Instance_should_save_snapshot(timestamp) + + Check if a snapshot should be saved after the S Operator of the submodel. + + This method checks if a snapshot should be saved right now, based on the + provided timestamp and passed wallclock time. + + When this method returns true, the submodel must also save a snapshot + through Instance::save_snapshot. A std::runtime_error will be generated when + not doing so. + + See also :f:func:`LIBMUSCLE_Instance_should_save_final_snapshot` for the + variant that must be called at the end of the reuse loop. + + MPI-based components must call this function in all processes + simultaneously. + + :p LIBMUSCLE_real8 timestamp: The current timestamp of the submodel. + :r should_save_snapshot: ``.true.`` iff a snapshot should be taken by the + submodel according to the checkpoint rules provided in the ymmsl + configuration. + :rtype should_save_snapshot: logical + +.. f:function:: LIBMUSCLE_Instance_save_snapshot(message) + + Save a snapshot after the S Operator of the submodel. + + Before saving a snapshot, you should check using + :f:func:`LIBMUSCLE_Instance_should_save_snapshot` if a snapshot should + be saved according to the checkpoint rules specified in the ymmsl + configuration. You should use the same timestamp in the provided Message + object as used to query :f:func:`LIBMUSCLE_Instance_should_save_snapshot`. + + MPI-based components may only call this from the root process. An error + is raised when attempting to call this method in any other process. It + is therefore up to the model code to gather the necessary state from + the non-root processes before saving the snapshot. + + :p LIBMUSCLE_Message message: Message object that is saved as snapshot. The message + timestamp attribute should be the same as passed to + :f:func:`LIBMUSCLE_Instance_should_save_snapshot`. The data attribute can + be used to store the internal state of the submodel. + +.. f:function:: LIBMUSCLE_Instance_should_save_final_snapshot() + + Check if a snapshot should be saved at the end of the reuse loop. + + This method checks if a snapshot should be saved now. + + When this method returns true, the submodel must also save a snapshot + through :f:func:`LIBMUSCLE_Instance_save_final_snapshot`. An error will be + generated when not doing so. + + See also :f:func:`LIBMUSCLE_Instance_should_save_snapshot` for the variant + that may be called inside of a time-integration loop of the submodel. + + MPI-based components must call this function in all processes + simultaneously. + + .. note:: + + This method will block until it can determine whether a final + snapshot should be taken. This means it must also determine if this + instance is reused. + + :r should_save_final_snapshot: ``.true.`` iff a final snapshot should be taken + by the submodel according to the checkpoint rules provided in the ymmsl + configuration. + :rtype should_save_final_snapshot: logical + +.. f:function:: LIBMUSCLE_Instance_save_final_snapshot(message) + + Save a snapshot at the end of the reuse loop. + + Before saving a snapshot, you should check using + :f:func:`LIBMUSCLE_Instance_should_save_final_snapshot` if a snapshot should + be saved according to the checkpoint rules specified in the ymmsl + configuration. + + See also :f:func:`LIBMUSCLE_Instance_save_snapshot` for the variant that may + be called after each S Operator of the submodel. + + MPI-based components may only call this from the root process. An error + is raised when attempting to call this method in any other process. It + is therefore up to the model code to gather the necessary state from + the non-root processes before saving the snapshot. + + :p LIBMUSCLE_Message message: Message object that is saved as snapshot. The data + attribute can be used to store the internal state of the submodel. + LIBMUSCLE_InstanceFlags ``````````````````````` diff --git a/integration_test/test_snapshot_macro_micro.py b/integration_test/test_snapshot_macro_micro.py index df500dfd..b619f0b9 100644 --- a/integration_test/test_snapshot_macro_micro.py +++ b/integration_test/test_snapshot_macro_micro.py @@ -204,6 +204,10 @@ def config_with_transformer(base_config): {'macro': ('cpp', 'snapshot_components_test', 'macro'), 'micro': ('cpp', 'snapshot_components_test', 'micro')}, marks=skip_if_python_only), + pytest.param( + {'macro': ('fortran', 'fortran_snapshot_macro_test'), + 'micro': ('fortran', 'fortran_snapshot_micro_test')}, + marks=skip_if_python_only), pytest.param( {'macro': ('python', macro), 'micro': ('cpp', 'mpi_snapshot_micro_test', '2')}, marks=[skip_if_python_only, skip_if_no_mpi_cpp]) @@ -248,7 +252,10 @@ def test_snapshot_macro_micro(tmp_path, base_config, actors): ('python', stateless_micro), pytest.param( ('cpp', 'snapshot_components_test', 'stateless_micro'), - marks=skip_if_python_only) + marks=skip_if_python_only), + pytest.param( + ('fortran', 'fortran_snapshot_stateless_micro_test'), + marks=skip_if_python_only), ]) def test_snapshot_macro_stateless_micro(tmp_path, base_config, micro_actor): actors = {'macro': ('python', macro), 'micro': micro_actor} @@ -275,7 +282,10 @@ def test_snapshot_macro_stateless_micro(tmp_path, base_config, micro_actor): ('python', macro_vector), pytest.param( ('cpp', 'snapshot_components_test', 'macro_vector'), - marks=skip_if_python_only) + marks=skip_if_python_only), + pytest.param( + ('fortran', 'fortran_snapshot_macro_vector_test'), + marks=skip_if_python_only), ]) def test_snapshot_macro_vector_micro(tmp_path, base_config, macro_actor): base_config.model.components[1].multiplicity = [2] diff --git a/libmuscle/cpp/build/libmuscle/libmuscle.version b/libmuscle/cpp/build/libmuscle/libmuscle.version index b86357c8..234c9d51 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle.version @@ -354,6 +354,13 @@ LIBMUSCLE_Instance_receive_with_settings_pd_; LIBMUSCLE_Instance_receive_with_settings_ps_; LIBMUSCLE_Instance_receive_with_settings_psd_; + LIBMUSCLE_Instance_resuming_; + LIBMUSCLE_Instance_should_init_; + LIBMUSCLE_Instance_load_snapshot_; + LIBMUSCLE_Instance_should_save_snapshot_; + LIBMUSCLE_Instance_save_snapshot_; + LIBMUSCLE_Instance_should_save_final_snapshot_; + LIBMUSCLE_Instance_save_final_snapshot_; LIBMUSCLE_InstanceFlags_to_int_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_; diff --git a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version index b86357c8..234c9d51 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version @@ -354,6 +354,13 @@ LIBMUSCLE_Instance_receive_with_settings_pd_; LIBMUSCLE_Instance_receive_with_settings_ps_; LIBMUSCLE_Instance_receive_with_settings_psd_; + LIBMUSCLE_Instance_resuming_; + LIBMUSCLE_Instance_should_init_; + LIBMUSCLE_Instance_load_snapshot_; + LIBMUSCLE_Instance_should_save_snapshot_; + LIBMUSCLE_Instance_save_snapshot_; + LIBMUSCLE_Instance_should_save_final_snapshot_; + LIBMUSCLE_Instance_save_final_snapshot_; LIBMUSCLE_InstanceFlags_to_int_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_; LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_free_; diff --git a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp index 34036c21..7b41a3d0 100644 --- a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp +++ b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_fortran_c.cpp @@ -4701,6 +4701,50 @@ std::intptr_t LIBMUSCLE_Instance_receive_with_settings_psd_(std::intptr_t self, return 0; } +bool LIBMUSCLE_Instance_resuming_(std::intptr_t self) { + Instance * self_p = reinterpret_cast(self); + bool result = self_p->resuming(); + return result; +} + +bool LIBMUSCLE_Instance_should_init_(std::intptr_t self) { + Instance * self_p = reinterpret_cast(self); + bool result = self_p->should_init(); + return result; +} + +std::intptr_t LIBMUSCLE_Instance_load_snapshot_(std::intptr_t self) { + Instance * self_p = reinterpret_cast(self); + Message * result = new Message(self_p->load_snapshot()); + return reinterpret_cast(result); +} + +bool LIBMUSCLE_Instance_should_save_snapshot_(std::intptr_t self, double timestamp) { + Instance * self_p = reinterpret_cast(self); + bool result = self_p->should_save_snapshot(timestamp); + return result; +} + +void LIBMUSCLE_Instance_save_snapshot_(std::intptr_t self, std::intptr_t message) { + Instance * self_p = reinterpret_cast(self); + Message * message_p = reinterpret_cast(message); + self_p->save_snapshot(*message_p); + return; +} + +bool LIBMUSCLE_Instance_should_save_final_snapshot_(std::intptr_t self) { + Instance * self_p = reinterpret_cast(self); + bool result = self_p->should_save_final_snapshot(); + return result; +} + +void LIBMUSCLE_Instance_save_final_snapshot_(std::intptr_t self, std::intptr_t message) { + Instance * self_p = reinterpret_cast(self); + Message * message_p = reinterpret_cast(message); + self_p->save_final_snapshot(*message_p); + return; +} + std::intptr_t LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(int count) { CmdLineArgs * result = new CmdLineArgs(count); return reinterpret_cast(result); diff --git a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp index b6170f3f..e6d38bf1 100644 --- a/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp +++ b/libmuscle/cpp/src/libmuscle/bindings/libmuscle_mpi_fortran_c.cpp @@ -4705,6 +4705,50 @@ std::intptr_t LIBMUSCLE_Instance_receive_with_settings_psd_(std::intptr_t self, return 0; } +bool LIBMUSCLE_Instance_resuming_(std::intptr_t self) { + Instance * self_p = reinterpret_cast(self); + bool result = self_p->resuming(); + return result; +} + +bool LIBMUSCLE_Instance_should_init_(std::intptr_t self) { + Instance * self_p = reinterpret_cast(self); + bool result = self_p->should_init(); + return result; +} + +std::intptr_t LIBMUSCLE_Instance_load_snapshot_(std::intptr_t self) { + Instance * self_p = reinterpret_cast(self); + Message * result = new Message(self_p->load_snapshot()); + return reinterpret_cast(result); +} + +bool LIBMUSCLE_Instance_should_save_snapshot_(std::intptr_t self, double timestamp) { + Instance * self_p = reinterpret_cast(self); + bool result = self_p->should_save_snapshot(timestamp); + return result; +} + +void LIBMUSCLE_Instance_save_snapshot_(std::intptr_t self, std::intptr_t message) { + Instance * self_p = reinterpret_cast(self); + Message * message_p = reinterpret_cast(message); + self_p->save_snapshot(*message_p); + return; +} + +bool LIBMUSCLE_Instance_should_save_final_snapshot_(std::intptr_t self) { + Instance * self_p = reinterpret_cast(self); + bool result = self_p->should_save_final_snapshot(); + return result; +} + +void LIBMUSCLE_Instance_save_final_snapshot_(std::intptr_t self, std::intptr_t message) { + Instance * self_p = reinterpret_cast(self); + Message * message_p = reinterpret_cast(message); + self_p->save_final_snapshot(*message_p); + return; +} + std::intptr_t LIBMUSCLE_IMPL_BINDINGS_CmdLineArgs_create_(int count) { CmdLineArgs * result = new CmdLineArgs(count); return reinterpret_cast(result); diff --git a/libmuscle/fortran/src/libmuscle/libmuscle.f90 b/libmuscle/fortran/src/libmuscle/libmuscle.f90 index 433e816a..f1799728 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle.f90 @@ -465,6 +465,13 @@ module libmuscle public :: LIBMUSCLE_Instance_receive_with_settings_ps public :: LIBMUSCLE_Instance_receive_with_settings_psd public :: LIBMUSCLE_Instance_receive_with_settings_on_slot + public :: LIBMUSCLE_Instance_resuming + public :: LIBMUSCLE_Instance_should_init + public :: LIBMUSCLE_Instance_load_snapshot + public :: LIBMUSCLE_Instance_should_save_snapshot + public :: LIBMUSCLE_Instance_save_snapshot + public :: LIBMUSCLE_Instance_should_save_final_snapshot + public :: LIBMUSCLE_Instance_save_final_snapshot public :: LIBMUSCLE_InstanceFlags type LIBMUSCLE_InstanceFlags logical :: DONT_APPLY_OVERLAY = .false. @@ -3484,6 +3491,64 @@ integer (c_intptr_t) function LIBMUSCLE_Instance_receive_with_settings_psd_( & integer (c_size_t), intent(out) :: err_msg_len end function LIBMUSCLE_Instance_receive_with_settings_psd_ + logical (c_bool) function LIBMUSCLE_Instance_resuming_(self) & + bind(C, name="LIBMUSCLE_Instance_resuming_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + end function LIBMUSCLE_Instance_resuming_ + + logical (c_bool) function LIBMUSCLE_Instance_should_init_(self) & + bind(C, name="LIBMUSCLE_Instance_should_init_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + end function LIBMUSCLE_Instance_should_init_ + + integer (c_intptr_t) function LIBMUSCLE_Instance_load_snapshot_(self) & + bind(C, name="LIBMUSCLE_Instance_load_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + end function LIBMUSCLE_Instance_load_snapshot_ + + logical (c_bool) function LIBMUSCLE_Instance_should_save_snapshot_( & + self, & + timestamp) & + bind(C, name="LIBMUSCLE_Instance_should_save_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + real (c_double), value, intent(in) :: timestamp + end function LIBMUSCLE_Instance_should_save_snapshot_ + + subroutine LIBMUSCLE_Instance_save_snapshot_( & + self, & + message) & + bind(C, name="LIBMUSCLE_Instance_save_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + integer (c_intptr_t), value, intent(in) :: message + end subroutine LIBMUSCLE_Instance_save_snapshot_ + + logical (c_bool) function LIBMUSCLE_Instance_should_save_final_snapshot_(self) & + bind(C, name="LIBMUSCLE_Instance_should_save_final_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + end function LIBMUSCLE_Instance_should_save_final_snapshot_ + + subroutine LIBMUSCLE_Instance_save_final_snapshot_( & + self, & + message) & + bind(C, name="LIBMUSCLE_Instance_save_final_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + integer (c_intptr_t), value, intent(in) :: message + end subroutine LIBMUSCLE_Instance_save_final_snapshot_ + end interface interface LIBMUSCLE_DataConstRef_create @@ -17861,6 +17926,103 @@ function LIBMUSCLE_Instance_receive_with_settings_psd( & LIBMUSCLE_Instance_receive_with_settings_psd%ptr = ret_val end function LIBMUSCLE_Instance_receive_with_settings_psd + function LIBMUSCLE_Instance_resuming( & + self) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + logical :: LIBMUSCLE_Instance_resuming + + logical (c_bool) :: ret_val + + ret_val = LIBMUSCLE_Instance_resuming_( & + self%ptr) + + LIBMUSCLE_Instance_resuming = ret_val + end function LIBMUSCLE_Instance_resuming + + function LIBMUSCLE_Instance_should_init( & + self) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + logical :: LIBMUSCLE_Instance_should_init + + logical (c_bool) :: ret_val + + ret_val = LIBMUSCLE_Instance_should_init_( & + self%ptr) + + LIBMUSCLE_Instance_should_init = ret_val + end function LIBMUSCLE_Instance_should_init + + function LIBMUSCLE_Instance_load_snapshot( & + self) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + type(LIBMUSCLE_Message) :: LIBMUSCLE_Instance_load_snapshot + + integer (c_intptr_t) :: ret_val + + ret_val = LIBMUSCLE_Instance_load_snapshot_( & + self%ptr) + + LIBMUSCLE_Instance_load_snapshot%ptr = ret_val + end function LIBMUSCLE_Instance_load_snapshot + + function LIBMUSCLE_Instance_should_save_snapshot( & + self, & + timestamp) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + real (LIBMUSCLE_real8), intent(in) :: timestamp + logical :: LIBMUSCLE_Instance_should_save_snapshot + + logical (c_bool) :: ret_val + + ret_val = LIBMUSCLE_Instance_should_save_snapshot_( & + self%ptr, & + timestamp) + + LIBMUSCLE_Instance_should_save_snapshot = ret_val + end function LIBMUSCLE_Instance_should_save_snapshot + + subroutine LIBMUSCLE_Instance_save_snapshot( & + self, & + message) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + type(LIBMUSCLE_Message), intent(in) :: message + + call LIBMUSCLE_Instance_save_snapshot_( & + self%ptr, & + message%ptr) + end subroutine LIBMUSCLE_Instance_save_snapshot + + function LIBMUSCLE_Instance_should_save_final_snapshot( & + self) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + logical :: LIBMUSCLE_Instance_should_save_final_snapshot + + logical (c_bool) :: ret_val + + ret_val = LIBMUSCLE_Instance_should_save_final_snapshot_( & + self%ptr) + + LIBMUSCLE_Instance_should_save_final_snapshot = ret_val + end function LIBMUSCLE_Instance_should_save_final_snapshot + + subroutine LIBMUSCLE_Instance_save_final_snapshot( & + self, & + message) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + type(LIBMUSCLE_Message), intent(in) :: message + + call LIBMUSCLE_Instance_save_final_snapshot_( & + self%ptr, & + message%ptr) + end subroutine LIBMUSCLE_Instance_save_final_snapshot + integer function LIBMUSCLE_InstanceFlags_to_int_(flags) implicit none diff --git a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 index 960620a6..0c24a3db 100644 --- a/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 +++ b/libmuscle/fortran/src/libmuscle/libmuscle_mpi.f90 @@ -466,6 +466,13 @@ module libmuscle_mpi public :: LIBMUSCLE_Instance_receive_with_settings_ps public :: LIBMUSCLE_Instance_receive_with_settings_psd public :: LIBMUSCLE_Instance_receive_with_settings_on_slot + public :: LIBMUSCLE_Instance_resuming + public :: LIBMUSCLE_Instance_should_init + public :: LIBMUSCLE_Instance_load_snapshot + public :: LIBMUSCLE_Instance_should_save_snapshot + public :: LIBMUSCLE_Instance_save_snapshot + public :: LIBMUSCLE_Instance_should_save_final_snapshot + public :: LIBMUSCLE_Instance_save_final_snapshot public :: LIBMUSCLE_InstanceFlags type LIBMUSCLE_InstanceFlags logical :: DONT_APPLY_OVERLAY = .false. @@ -3489,6 +3496,64 @@ integer (c_intptr_t) function LIBMUSCLE_Instance_receive_with_settings_psd_( & integer (c_size_t), intent(out) :: err_msg_len end function LIBMUSCLE_Instance_receive_with_settings_psd_ + logical (c_bool) function LIBMUSCLE_Instance_resuming_(self) & + bind(C, name="LIBMUSCLE_Instance_resuming_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + end function LIBMUSCLE_Instance_resuming_ + + logical (c_bool) function LIBMUSCLE_Instance_should_init_(self) & + bind(C, name="LIBMUSCLE_Instance_should_init_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + end function LIBMUSCLE_Instance_should_init_ + + integer (c_intptr_t) function LIBMUSCLE_Instance_load_snapshot_(self) & + bind(C, name="LIBMUSCLE_Instance_load_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + end function LIBMUSCLE_Instance_load_snapshot_ + + logical (c_bool) function LIBMUSCLE_Instance_should_save_snapshot_( & + self, & + timestamp) & + bind(C, name="LIBMUSCLE_Instance_should_save_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + real (c_double), value, intent(in) :: timestamp + end function LIBMUSCLE_Instance_should_save_snapshot_ + + subroutine LIBMUSCLE_Instance_save_snapshot_( & + self, & + message) & + bind(C, name="LIBMUSCLE_Instance_save_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + integer (c_intptr_t), value, intent(in) :: message + end subroutine LIBMUSCLE_Instance_save_snapshot_ + + logical (c_bool) function LIBMUSCLE_Instance_should_save_final_snapshot_(self) & + bind(C, name="LIBMUSCLE_Instance_should_save_final_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + end function LIBMUSCLE_Instance_should_save_final_snapshot_ + + subroutine LIBMUSCLE_Instance_save_final_snapshot_( & + self, & + message) & + bind(C, name="LIBMUSCLE_Instance_save_final_snapshot_") + + use iso_c_binding + integer (c_intptr_t), value, intent(in) :: self + integer (c_intptr_t), value, intent(in) :: message + end subroutine LIBMUSCLE_Instance_save_final_snapshot_ + end interface interface LIBMUSCLE_DataConstRef_create @@ -17873,6 +17938,103 @@ function LIBMUSCLE_Instance_receive_with_settings_psd( & LIBMUSCLE_Instance_receive_with_settings_psd%ptr = ret_val end function LIBMUSCLE_Instance_receive_with_settings_psd + function LIBMUSCLE_Instance_resuming( & + self) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + logical :: LIBMUSCLE_Instance_resuming + + logical (c_bool) :: ret_val + + ret_val = LIBMUSCLE_Instance_resuming_( & + self%ptr) + + LIBMUSCLE_Instance_resuming = ret_val + end function LIBMUSCLE_Instance_resuming + + function LIBMUSCLE_Instance_should_init( & + self) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + logical :: LIBMUSCLE_Instance_should_init + + logical (c_bool) :: ret_val + + ret_val = LIBMUSCLE_Instance_should_init_( & + self%ptr) + + LIBMUSCLE_Instance_should_init = ret_val + end function LIBMUSCLE_Instance_should_init + + function LIBMUSCLE_Instance_load_snapshot( & + self) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + type(LIBMUSCLE_Message) :: LIBMUSCLE_Instance_load_snapshot + + integer (c_intptr_t) :: ret_val + + ret_val = LIBMUSCLE_Instance_load_snapshot_( & + self%ptr) + + LIBMUSCLE_Instance_load_snapshot%ptr = ret_val + end function LIBMUSCLE_Instance_load_snapshot + + function LIBMUSCLE_Instance_should_save_snapshot( & + self, & + timestamp) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + real (LIBMUSCLE_real8), intent(in) :: timestamp + logical :: LIBMUSCLE_Instance_should_save_snapshot + + logical (c_bool) :: ret_val + + ret_val = LIBMUSCLE_Instance_should_save_snapshot_( & + self%ptr, & + timestamp) + + LIBMUSCLE_Instance_should_save_snapshot = ret_val + end function LIBMUSCLE_Instance_should_save_snapshot + + subroutine LIBMUSCLE_Instance_save_snapshot( & + self, & + message) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + type(LIBMUSCLE_Message), intent(in) :: message + + call LIBMUSCLE_Instance_save_snapshot_( & + self%ptr, & + message%ptr) + end subroutine LIBMUSCLE_Instance_save_snapshot + + function LIBMUSCLE_Instance_should_save_final_snapshot( & + self) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + logical :: LIBMUSCLE_Instance_should_save_final_snapshot + + logical (c_bool) :: ret_val + + ret_val = LIBMUSCLE_Instance_should_save_final_snapshot_( & + self%ptr) + + LIBMUSCLE_Instance_should_save_final_snapshot = ret_val + end function LIBMUSCLE_Instance_should_save_final_snapshot + + subroutine LIBMUSCLE_Instance_save_final_snapshot( & + self, & + message) + implicit none + type(LIBMUSCLE_Instance), intent(in) :: self + type(LIBMUSCLE_Message), intent(in) :: message + + call LIBMUSCLE_Instance_save_final_snapshot_( & + self%ptr, & + message%ptr) + end subroutine LIBMUSCLE_Instance_save_final_snapshot + integer function LIBMUSCLE_InstanceFlags_to_int_(flags) implicit none diff --git a/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_macro_test.f90 b/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_macro_test.f90 new file mode 100644 index 00000000..38a5c821 --- /dev/null +++ b/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_macro_test.f90 @@ -0,0 +1,86 @@ +! This is a part of the integration test suite, and is run from a Python +! test in /integration_test. It is not a unit test. + +program snapshot_macro + use assert + use ymmsl + use libmuscle + implicit none + + type(LIBMUSCLE_PortsDescription) :: ports + type(LIBMUSCLE_Instance) :: instance + type(LIBMUSCLE_Message) :: msg + type(LIBMUSCLE_DataConstRef) :: rdata + type(LIBMUSCLE_Message) :: message + type(LIBMUSCLE_Data) :: sdata + integer :: i + real (LIBMUSCLE_real8) :: dt, t_max, t_cur, t_next + + ports = LIBMUSCLE_PortsDescription_create() + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_I, 'o_i') + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_S, 's') + instance = LIBMUSCLE_Instance_create( & + ports, LIBMUSCLE_InstanceFlags(USES_CHECKPOINT_API=.true.)) + call LIBMUSCLE_PortsDescription_free(ports) + + do while (LIBMUSCLE_Instance_reuse_instance(instance)) + dt = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'dt') + t_max = LIBMUSCLE_Instance_get_setting_as_real8(instance, 't_max') + + if (LIBMUSCLE_Instance_resuming(instance)) then + msg = LIBMUSCLE_Instance_load_snapshot(instance) + ! load state from message + t_cur = LIBMUSCLE_Message_timestamp(msg) + rdata = LIBMUSCLE_Message_get_data(msg) + i = LIBMUSCLE_DataConstRef_as_int(rdata) + + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(msg) + end if + + if (LIBMUSCLE_Instance_should_init(instance)) then + t_cur = LIBMUSCLE_Instance_get_setting_as_real8(instance, 't0') + i = 0 + end if + + do while (t_cur + dt <= t_max) + sdata = LIBMUSCLE_Data_create(i) + msg = LIBMUSCLE_Message_create(t_cur, sdata) + t_next = t_cur + dt + if (t_next + dt <= t_max) then + call LIBMUSCLE_Message_set_next_timestamp(msg, t_next) + end if + call LIBMUSCLE_Instance_send(instance, 'o_i', msg) + call LIBMUSCLE_Message_free(msg) + call LIBMUSCLE_Data_free(sdata) + + msg = LIBMUSCLE_Instance_receive(instance, 's') + rdata = LIBMUSCLE_Message_get_data(msg) + call assert_eq_integer(LIBMUSCLE_DataConstRef_as_int(rdata), i) + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(msg) + + i = i + 1 + t_cur = t_cur + dt + + if (LIBMUSCLE_Instance_should_save_snapshot(instance, t_cur)) then + sdata = LIBMUSCLE_Data_create(i) + msg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_save_snapshot(instance, msg) + call LIBMUSCLE_Message_free(msg) + call LIBMUSCLE_Data_free(sdata) + end if + end do + + if (LIBMUSCLE_Instance_should_save_final_snapshot(instance)) then + sdata = LIBMUSCLE_Data_create(i) + msg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_save_final_snapshot(instance, msg) + call LIBMUSCLE_Message_free(msg) + call LIBMUSCLE_Data_free(sdata) + end if + end do + + call LIBMUSCLE_Instance_free(instance) + +end program snapshot_macro diff --git a/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_macro_vector_test.f90 b/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_macro_vector_test.f90 new file mode 100644 index 00000000..987242f8 --- /dev/null +++ b/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_macro_vector_test.f90 @@ -0,0 +1,90 @@ +! This is a part of the integration test suite, and is run from a Python +! test in /integration_test. It is not a unit test. + +program snapshot_macro_vector + use assert + use ymmsl + use libmuscle + implicit none + + type(LIBMUSCLE_PortsDescription) :: ports + type(LIBMUSCLE_Instance) :: instance + type(LIBMUSCLE_Message) :: msg + type(LIBMUSCLE_DataConstRef) :: rdata + type(LIBMUSCLE_Message) :: message + type(LIBMUSCLE_Data) :: sdata + integer :: i, slot + real (LIBMUSCLE_real8) :: dt, t_max, t_cur, t_next + + ports = LIBMUSCLE_PortsDescription_create() + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_I, 'o_i[]') + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_S, 's[]') + instance = LIBMUSCLE_Instance_create( & + ports, LIBMUSCLE_InstanceFlags(USES_CHECKPOINT_API=.true.)) + call LIBMUSCLE_PortsDescription_free(ports) + + do while (LIBMUSCLE_Instance_reuse_instance(instance)) + dt = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'dt') + t_max = LIBMUSCLE_Instance_get_setting_as_real8(instance, 't_max') + + if (LIBMUSCLE_Instance_resuming(instance)) then + msg = LIBMUSCLE_Instance_load_snapshot(instance) + ! load state from message + t_cur = LIBMUSCLE_Message_timestamp(msg) + rdata = LIBMUSCLE_Message_get_data(msg) + i = LIBMUSCLE_DataConstRef_as_int(rdata) + + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(msg) + end if + + if (LIBMUSCLE_Instance_should_init(instance)) then + t_cur = LIBMUSCLE_Instance_get_setting_as_real8(instance, 't0') + i = 0 + end if + + do while (t_cur + dt <= t_max) + sdata = LIBMUSCLE_Data_create(i) + msg = LIBMUSCLE_Message_create(t_cur, sdata) + t_next = t_cur + dt + if (t_next + dt <= t_max) then + call LIBMUSCLE_Message_set_next_timestamp(msg, t_next) + end if + do slot = 1, LIBMUSCLE_Instance_get_port_length(instance, 'o_i') + call LIBMUSCLE_Instance_send(instance, 'o_i', msg, slot - 1) + end do + call LIBMUSCLE_Message_free(msg) + call LIBMUSCLE_Data_free(sdata) + + do slot = 1, LIBMUSCLE_Instance_get_port_length(instance, 's') + msg = LIBMUSCLE_Instance_receive_on_slot(instance, 's', slot - 1) + rdata = LIBMUSCLE_Message_get_data(msg) + call assert_eq_integer(LIBMUSCLE_DataConstRef_as_int(rdata), i) + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(msg) + end do + + i = i + 1 + t_cur = t_cur + dt + + if (LIBMUSCLE_Instance_should_save_snapshot(instance, t_cur)) then + sdata = LIBMUSCLE_Data_create(i) + msg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_save_snapshot(instance, msg) + call LIBMUSCLE_Message_free(msg) + call LIBMUSCLE_Data_free(sdata) + end if + end do + + if (LIBMUSCLE_Instance_should_save_final_snapshot(instance)) then + sdata = LIBMUSCLE_Data_create(i) + msg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_save_final_snapshot(instance, msg) + call LIBMUSCLE_Message_free(msg) + call LIBMUSCLE_Data_free(sdata) + end if + end do + + call LIBMUSCLE_Instance_free(instance) + +end program snapshot_macro_vector diff --git a/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_micro_test.f90 b/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_micro_test.f90 new file mode 100644 index 00000000..c8ea51ec --- /dev/null +++ b/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_micro_test.f90 @@ -0,0 +1,98 @@ +! This is a part of the integration test suite, and is run from a Python +! test in /integration_test. It is not a unit test. + +program snapshot_micro + use assert + use ymmsl + use libmuscle + implicit none + + type(LIBMUSCLE_PortsDescription) :: ports + type(LIBMUSCLE_Instance) :: instance + type(LIBMUSCLE_Message) :: msg + type(LIBMUSCLE_DataConstRef) :: rdata, rdata2 + type(LIBMUSCLE_Message) :: message + type(LIBMUSCLE_Data) :: sdata + integer :: i + real (LIBMUSCLE_real8) :: dt, t_max, t_cur, t_next, t_stop + + ports = LIBMUSCLE_PortsDescription_create() + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_F_INIT, 'f_i') + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_F, 'o_f') + instance = LIBMUSCLE_Instance_create( & + ports, LIBMUSCLE_InstanceFlags(USES_CHECKPOINT_API=.true.)) + call LIBMUSCLE_PortsDescription_free(ports) + + do while (LIBMUSCLE_Instance_reuse_instance(instance)) + dt = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'dt') + t_max = LIBMUSCLE_Instance_get_setting_as_real8(instance, 't_max') + + if (LIBMUSCLE_Instance_resuming(instance)) then + msg = LIBMUSCLE_Instance_load_snapshot(instance) + ! load state from message + t_cur = LIBMUSCLE_Message_timestamp(msg) + rdata = LIBMUSCLE_Message_get_data(msg) + + rdata2 = LIBMUSCLE_DataConstRef_get_item(rdata, 1_LIBMUSCLE_size) + i = LIBMUSCLE_DataConstRef_as_int(rdata2) + call LIBMUSCLE_DataConstRef_free(rdata2) + + rdata2 = LIBMUSCLE_DataConstRef_get_item(rdata, 2_LIBMUSCLE_size) + t_stop = LIBMUSCLE_DataConstRef_as_real8(rdata2) + call LIBMUSCLE_DataConstRef_free(rdata2) + + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(msg) + end if + + if (LIBMUSCLE_Instance_should_init(instance)) then + msg = LIBMUSCLE_Instance_receive(instance, 'f_i') + t_cur = LIBMUSCLE_Message_timestamp(msg) + + rdata = LIBMUSCLE_Message_get_data(msg) + i = LIBMUSCLE_DataConstRef_as_int(rdata) + t_stop = t_cur + t_max + + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(msg) + end if + + do while (t_cur <= t_stop) + ! faux time-integration for testing snapshots + t_cur = t_cur + dt + + if (LIBMUSCLE_Instance_should_save_snapshot(instance, t_cur)) then + sdata = LIBMUSCLE_Data_create_nils(2_LIBMUSCLE_size) + call LIBMUSCLE_Data_set_item(sdata, 1_LIBMUSCLE_size, i) + call LIBMUSCLE_Data_set_item(sdata, 2_LIBMUSCLE_size, t_stop) + + msg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_save_snapshot(instance, msg) + + call LIBMUSCLE_Message_free(msg) + call LIBMUSCLE_Data_free(sdata) + end if + end do + + sdata = LIBMUSCLE_Data_create(i) + msg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_send(instance, 'o_f', msg) + call LIBMUSCLE_Data_free(sdata) + call LIBMUSCLE_Message_free(msg) + + if (LIBMUSCLE_Instance_should_save_final_snapshot(instance)) then + sdata = LIBMUSCLE_Data_create_nils(2_LIBMUSCLE_size) + call LIBMUSCLE_Data_set_item(sdata, 1_LIBMUSCLE_size, i) + call LIBMUSCLE_Data_set_item(sdata, 2_LIBMUSCLE_size, t_stop) + + msg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_save_final_snapshot(instance, msg) + + call LIBMUSCLE_Message_free(msg) + call LIBMUSCLE_Data_free(sdata) + end if + end do + + call LIBMUSCLE_Instance_free(instance) + +end program snapshot_micro diff --git a/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_stateless_micro_test.f90 b/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_stateless_micro_test.f90 new file mode 100644 index 00000000..47106894 --- /dev/null +++ b/libmuscle/fortran/src/libmuscle/tests/fortran_snapshot_stateless_micro_test.f90 @@ -0,0 +1,54 @@ +! This is a part of the integration test suite, and is run from a Python +! test in /integration_test. It is not a unit test. + +program snapshot_micro + use assert + use ymmsl + use libmuscle + implicit none + + type(LIBMUSCLE_PortsDescription) :: ports + type(LIBMUSCLE_Instance) :: instance + type(LIBMUSCLE_Message) :: msg + type(LIBMUSCLE_DataConstRef) :: rdata, rdata2 + type(LIBMUSCLE_Message) :: message + type(LIBMUSCLE_Data) :: sdata + integer :: i + real (LIBMUSCLE_real8) :: dt, t_max, t_cur, t_next, t_stop + + ports = LIBMUSCLE_PortsDescription_create() + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_F_INIT, 'f_i') + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_F, 'o_f') + instance = LIBMUSCLE_Instance_create( & + ports, LIBMUSCLE_InstanceFlags(KEEPS_NO_STATE_FOR_NEXT_USE=.true.)) + call LIBMUSCLE_PortsDescription_free(ports) + + do while(LIBMUSCLE_Instance_reuse_instance(instance)) + dt = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'dt') + t_max = LIBMUSCLE_Instance_get_setting_as_real8(instance, 't_max') + + msg = LIBMUSCLE_Instance_receive(instance, 'f_i') + t_cur = LIBMUSCLE_Message_timestamp(msg) + + rdata = LIBMUSCLE_Message_get_data(msg) + i = LIBMUSCLE_DataConstRef_as_int(rdata) + t_stop = t_cur + t_max + + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(msg) + + do while (t_cur <= t_stop) + ! faux time-integration for testing snapshots + t_cur = t_cur + dt + end do + + sdata = LIBMUSCLE_Data_create(i) + msg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_send(instance, 'o_f', msg) + call LIBMUSCLE_Data_free(sdata) + call LIBMUSCLE_Message_free(msg) + end do + + call LIBMUSCLE_Instance_free(instance) + +end program snapshot_micro diff --git a/scripts/make_libmuscle_api.py b/scripts/make_libmuscle_api.py index 933f1330..3c84cb09 100755 --- a/scripts/make_libmuscle_api.py +++ b/scripts/make_libmuscle_api.py @@ -1008,7 +1008,14 @@ def __copy__(self) -> 'Elements': 'self_p->receive_with_settings({})'.format( kwargs['cpp_args']))), OverloadSet('receive_with_settings_on_slot', - ['receive_with_settings_ps', 'receive_with_settings_psd']) + ['receive_with_settings_ps', 'receive_with_settings_psd']), + MemFun(Bool(), 'resuming'), + MemFun(Bool(), 'should_init'), + MemFun(Obj('Message'), 'load_snapshot'), + MemFun(Bool(), 'should_save_snapshot', [Double('timestamp')]), + MemFun(Void(), 'save_snapshot', [Obj('Message', 'message')]), + MemFun(Bool(), 'should_save_final_snapshot'), + MemFun(Void(), 'save_final_snapshot', [Obj('Message', 'message')]), ] From e04627cd4aa9ad078728346fc80f5d0c3547a9a2 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 21 Feb 2023 10:59:44 +0100 Subject: [PATCH 070/188] Update version scripts for C++ public API --- libmuscle/cpp/build/libmuscle/libmuscle.version | 7 +++++++ libmuscle/cpp/build/libmuscle/libmuscle.version.in | 7 +++++++ libmuscle/cpp/build/libmuscle/libmuscle_mpi.version | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/libmuscle/cpp/build/libmuscle/libmuscle.version b/libmuscle/cpp/build/libmuscle/libmuscle.version index 234c9d51..48de674d 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle.version @@ -33,6 +33,13 @@ libmuscle::impl::Instance::receive_with_settings*; libmuscle::impl::Instance::receive*; libmuscle::impl::Instance::send*; + libmuscle::impl::Instance::resuming*; + libmuscle::impl::Instance::load_snapshot*; + libmuscle::impl::Instance::should_init*; + libmuscle::impl::Instance::should_save_snapshot*; + libmuscle::impl::Instance::save_snapshot*; + libmuscle::impl::Instance::should_save_final_snapshot*; + libmuscle::impl::Instance::save_final_snapshot*; libmuscle::impl::Message::*; diff --git a/libmuscle/cpp/build/libmuscle/libmuscle.version.in b/libmuscle/cpp/build/libmuscle/libmuscle.version.in index f7a0c318..8eb49544 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle.version.in +++ b/libmuscle/cpp/build/libmuscle/libmuscle.version.in @@ -33,6 +33,13 @@ libmuscle::impl::Instance::receive_with_settings*; libmuscle::impl::Instance::receive*; libmuscle::impl::Instance::send*; + libmuscle::impl::Instance::resuming*; + libmuscle::impl::Instance::load_snapshot*; + libmuscle::impl::Instance::should_init*; + libmuscle::impl::Instance::should_save_snapshot*; + libmuscle::impl::Instance::save_snapshot*; + libmuscle::impl::Instance::should_save_final_snapshot*; + libmuscle::impl::Instance::save_final_snapshot*; libmuscle::impl::Message::*; diff --git a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version index 234c9d51..48de674d 100644 --- a/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version +++ b/libmuscle/cpp/build/libmuscle/libmuscle_mpi.version @@ -33,6 +33,13 @@ libmuscle::impl::Instance::receive_with_settings*; libmuscle::impl::Instance::receive*; libmuscle::impl::Instance::send*; + libmuscle::impl::Instance::resuming*; + libmuscle::impl::Instance::load_snapshot*; + libmuscle::impl::Instance::should_init*; + libmuscle::impl::Instance::should_save_snapshot*; + libmuscle::impl::Instance::save_snapshot*; + libmuscle::impl::Instance::should_save_final_snapshot*; + libmuscle::impl::Instance::save_final_snapshot*; libmuscle::impl::Message::*; From d9c1413015f4651fa3273c6e84ac1e2fe32159d4 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 21 Feb 2023 12:49:44 +0100 Subject: [PATCH 071/188] Add C++ and Fortran checkpointing examples --- docs/source/examples/Makefile | 4 +- docs/source/examples/cpp/build/Makefile | 2 +- .../examples/cpp/checkpointing_diffusion.cpp | 142 +++++++++++++++ .../examples/cpp/checkpointing_reaction.cpp | 84 +++++++++ docs/source/examples/fortran/build/Makefile | 2 +- .../fortran/checkpointing_diffusion.f90 | 163 ++++++++++++++++++ .../fortran/checkpointing_reaction.f90 | 104 +++++++++++ docs/source/examples/rd_checkpoints_cpp.ymmsl | 33 ++++ .../examples/rd_checkpoints_fortran.ymmsl | 33 ++++ ...ints.ymmsl => rd_checkpoints_python.ymmsl} | 0 .../examples/rd_implementations.ymmsl.in | 20 +++ 11 files changed, 584 insertions(+), 3 deletions(-) create mode 100644 docs/source/examples/cpp/checkpointing_diffusion.cpp create mode 100644 docs/source/examples/cpp/checkpointing_reaction.cpp create mode 100644 docs/source/examples/fortran/checkpointing_diffusion.f90 create mode 100644 docs/source/examples/fortran/checkpointing_reaction.f90 create mode 100644 docs/source/examples/rd_checkpoints_cpp.ymmsl create mode 100644 docs/source/examples/rd_checkpoints_fortran.ymmsl rename docs/source/examples/{rd_checkpoints.ymmsl => rd_checkpoints_python.ymmsl} (100%) diff --git a/docs/source/examples/Makefile b/docs/source/examples/Makefile index 8507eb52..e12686fc 100644 --- a/docs/source/examples/Makefile +++ b/docs/source/examples/Makefile @@ -109,12 +109,13 @@ clean: .PHONY: test_python test_python: base . python/build/venv/bin/activate && DONTPLOT=1 muscle_manager --start-all rd_implementations.ymmsl rd_python.ymmsl rd_settings.ymmsl - . python/build/venv/bin/activate && DONTPLOT=1 muscle_manager --start-all rd_implementations.ymmsl rd_checkpoints.ymmsl rd_settings.ymmsl + . python/build/venv/bin/activate && DONTPLOT=1 muscle_manager --start-all rd_implementations.ymmsl rd_checkpoints_python.ymmsl rd_settings.ymmsl make -C python test .PHONY: test_cpp test_cpp: base cpp . python/build/venv/bin/activate && muscle_manager --start-all rd_implementations.ymmsl rd_cpp.ymmsl rd_settings.ymmsl + . python/build/venv/bin/activate && muscle_manager --start-all rd_implementations.ymmsl rd_checkpoints_cpp.ymmsl rd_settings.ymmsl . python/build/venv/bin/activate && muscle_manager --start-all rd_implementations.ymmsl rd_python_cpp.ymmsl rd_settings.ymmsl . python/build/venv/bin/activate && muscle_manager --start-all rd_implementations.ymmsl rdmc_cpp.ymmsl rdmc_settings.ymmsl @@ -125,6 +126,7 @@ test_cpp_mpi: base cpp_mpi .PHONY: test_fortran test_fortran: base fortran . python/build/venv/bin/activate && muscle_manager --start-all rd_implementations.ymmsl rd_fortran.ymmsl rd_settings.ymmsl + . python/build/venv/bin/activate && muscle_manager --start-all rd_implementations.ymmsl rd_checkpoints_fortran.ymmsl rd_settings.ymmsl . python/build/venv/bin/activate && muscle_manager --start-all rd_implementations.ymmsl rd_python_fortran.ymmsl rd_settings.ymmsl . python/build/venv/bin/activate && muscle_manager --start-all rd_implementations.ymmsl rdmc_fortran.ymmsl rdmc_settings.ymmsl diff --git a/docs/source/examples/cpp/build/Makefile b/docs/source/examples/cpp/build/Makefile index c5c46c9c..f706b875 100644 --- a/docs/source/examples/cpp/build/Makefile +++ b/docs/source/examples/cpp/build/Makefile @@ -5,7 +5,7 @@ MPI_CXXFLAGS := -std=c++14 -g $(shell pkg-config --cflags libmuscle_mpi ymmsl) MPI_LDFLAGS := $(shell pkg-config --libs libmuscle_mpi ymmsl) -binaries := reaction diffusion mc_driver load_balancer +binaries := reaction diffusion mc_driver load_balancer checkpointing_reaction checkpointing_diffusion mpi_binaries := reaction_mpi diff --git a/docs/source/examples/cpp/checkpointing_diffusion.cpp b/docs/source/examples/cpp/checkpointing_diffusion.cpp new file mode 100644 index 00000000..755ea890 --- /dev/null +++ b/docs/source/examples/cpp/checkpointing_diffusion.cpp @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include + +#include +#include + +using libmuscle::Data; +using libmuscle::Instance; +using libmuscle::InstanceFlags; +using libmuscle::Message; +using ymmsl::Operator; + + +/* Calculates the Laplacian of vector Z. + * + * @param Z A vector representing a series of samples along a line. + * @param dx The spacing between the samples. + */ +std::vector laplacian(std::vector const & Z, double dx) { + std::vector result(Z.size() - 2); + for (std::size_t i = 0u; i < result.size(); ++i) + result[i] = (Z[i] + Z[i+2] - 2.0 * Z[i+1]) / (dx * dx); + return result; +} + + +/** A simple diffusion model on a 1d grid. + * + * The state of this model is a 1D grid of concentrations. It sends out the + * state on each timestep on 'state_out', and can receive an updated state + * on 'state_in' at each state update. + */ +void diffusion(int argc, char * argv[]) { + Instance instance(argc, argv, { + {Operator::O_I, {"state_out"}}, + {Operator::S, {"state_in"}}, + {Operator::O_F, {"final_state_out"}}}, + InstanceFlags::USES_CHECKPOINT_API); + + while (instance.reuse_instance()) { + // F_INIT + double t_max = instance.get_setting_as("t_max"); + double dt = instance.get_setting_as("dt"); + double x_max = instance.get_setting_as("x_max"); + double dx = instance.get_setting_as("dx"); + double d = instance.get_setting_as("d"); + + double t_cur; + std::vector> Us; + std::vector U(lrint(x_max / dx), 1e-20); + + if (instance.resuming()) { + auto msg = instance.load_snapshot(); + for (int i = 0; i < msg.data().size(); ++i) { + if (msg.data()[i].shape().size() != 1u || msg.data()[i].size() != U.size()) { + auto err_msg = "Received state of incorrect shape or size!"; + instance.error_shutdown(err_msg); + throw std::runtime_error(err_msg); + } + std::copy_n(msg.data()[i].elements(), msg.data()[i].size(), U.begin()); + Us.push_back(U); + } + t_cur = msg.timestamp(); + } + + if (instance.should_init()) { + U = std::vector(lrint(x_max / dx), 1e-20); + U[25] = 2.0; + U[50] = 2.0; + U[75] = 2.0; + + Us.push_back(U); + + t_cur = 0.0; + } + + while (t_cur + dt <= t_max) { + std::cerr << "t_cur: " << t_cur << ", t_max: " << t_max << std::endl; + // O_I + auto data = Data::grid(U.data(), {U.size()}, {"x"}); + Message cur_state_msg(t_cur, data); + double t_next = t_cur + dt; + if (t_next + dt <= t_max) + cur_state_msg.set_next_timestamp(t_next); + instance.send("state_out", cur_state_msg); + + // S + auto msg = instance.receive("state_in", cur_state_msg); + if (msg.data().shape().size() != 1u || msg.data().size() != U.size()) { + auto msg = "Received state of incorrect shape or size!"; + instance.error_shutdown(msg); + throw std::runtime_error(msg); + } + std::copy_n(msg.data().elements(), msg.data().size(), U.begin()); + + std::vector dU(U.size()); + auto lpl = laplacian(U, dx); + for (std::size_t i = 1u; i < dU.size() - 1; ++i) + dU[i] = d * lpl[i-1] * dt; + dU[0] = dU[1]; + dU[dU.size() - 1] = dU[dU.size() - 2]; + + for (std::size_t i = 0u; i < dU.size(); ++i) + U[i] += dU[i]; + + Us.push_back(U); + t_cur += dt; + + if (instance.should_save_snapshot(t_cur)) { + Data data = Data::nils(Us.size()); + for (uint i = 0; i < data.size(); ++i) + data[i] = Data::grid(Us[i].data(), {Us[i].size()}); + Message msg(t_cur, data); + instance.save_snapshot(msg); + } + } + + // O_F + auto data = Data::grid(U.data(), {U.size()}, {"x"}); + instance.send("final_state_out", Message(t_cur, data)); + std::cerr << "All done" << std::endl; + + + if (instance.should_save_final_snapshot()) { + Data data = Data::nils(Us.size()); + for (uint i = 0; i < data.size(); ++i) + data[i] = Data::grid(Us[i].data(), {Us[i].size()}); + Message msg(t_cur, data); + instance.save_final_snapshot(msg); + } + } +} + + +int main(int argc, char * argv[]) { + diffusion(argc, argv); + return EXIT_SUCCESS; +} + diff --git a/docs/source/examples/cpp/checkpointing_reaction.cpp b/docs/source/examples/cpp/checkpointing_reaction.cpp new file mode 100644 index 00000000..d1783f77 --- /dev/null +++ b/docs/source/examples/cpp/checkpointing_reaction.cpp @@ -0,0 +1,84 @@ +#include +#include + +#include +#include + + +using libmuscle::Data; +using libmuscle::DataConstRef; +using libmuscle::Instance; +using libmuscle::InstanceFlags; +using libmuscle::Message; +using ymmsl::Operator; + + +/** A simple exponential reaction model on a 1D grid. + */ +void reaction(int argc, char * argv[]) { + Instance instance(argc, argv, { + {Operator::F_INIT, {"initial_state"}}, // 1D Grid + {Operator::O_F, {"final_state"}}}, // 1D Grid + InstanceFlags::USES_CHECKPOINT_API); + + while (instance.reuse_instance()) { + + // F_INIT + double t_max = instance.get_setting_as("t_max"); + double dt = instance.get_setting_as("dt"); + double k = instance.get_setting_as("k"); + double t_stop, t_cur; + std::vector U; + + if (instance.resuming()) { + auto msg = instance.load_snapshot(); + if (!msg.data().is_nil()) { + // A final snapshot does not have data in it, but that's fine: we + // will do the F_INIT step inside `should_init()` below. + auto data_ptr = msg.data()[0].elements(); + U = std::vector(data_ptr, data_ptr + msg.data()[0].size()); + t_cur = msg.timestamp(); + t_stop = msg.data()[1].as(); + } + } + + if (instance.should_init()) { + auto msg = instance.receive("initial_state"); + auto data_ptr = msg.data().elements(); + U = std::vector(data_ptr, data_ptr + msg.data().size()); + t_cur = msg.timestamp(); + t_stop = msg.timestamp() + t_max; + } + + while (t_cur + dt < t_stop) { + // O_I + + // S + for (double & u : U) + u += k * u * dt; + t_cur += dt; + + if (instance.should_save_snapshot(t_cur)) { + Message msg(t_cur, + Data::list(Data::grid(U.data(), {U.size()}, {"x"}), t_stop)); + instance.save_snapshot(msg); + } + } + + // O_F + auto result = Data::grid(U.data(), {U.size()}, {"x"}); + instance.send("final_state", Message(t_cur, result)); + + if (instance.should_save_final_snapshot()) { + Message msg(t_cur); + instance.save_final_snapshot(msg); + } + } +} + + +int main(int argc, char * argv[]) { + reaction(argc, argv); + return EXIT_SUCCESS; +} + diff --git a/docs/source/examples/fortran/build/Makefile b/docs/source/examples/fortran/build/Makefile index 2d859c3f..02855262 100644 --- a/docs/source/examples/fortran/build/Makefile +++ b/docs/source/examples/fortran/build/Makefile @@ -17,7 +17,7 @@ else endif -binaries := reaction diffusion mc_driver load_balancer +binaries := reaction diffusion mc_driver load_balancer checkpointing_reaction checkpointing_diffusion mpi_binaries := reaction_mpi diff --git a/docs/source/examples/fortran/checkpointing_diffusion.f90 b/docs/source/examples/fortran/checkpointing_diffusion.f90 new file mode 100644 index 00000000..4d204c49 --- /dev/null +++ b/docs/source/examples/fortran/checkpointing_diffusion.f90 @@ -0,0 +1,163 @@ +! A simple diffusion model on a 1D grid. +! +! The state of this model is a 1D grid of concentrations. It sends out the +! state on each timestep on 'state_out', and can receive an updated state +! on 'state_in' at each state update. + +program diffusion + use ymmsl + use libmuscle + implicit none + + type(LIBMUSCLE_PortsDescription) :: ports + type(LIBMUSCLE_Instance) :: instance + + type(LIBMUSCLE_Message) :: rmsg + type(LIBMUSCLE_DataConstRef) :: rdata, item + integer (LIBMUSCLE_size), dimension(2) :: shp + + type(LIBMUSCLE_Message) :: smsg + type(LIBMUSCLE_Data) :: sdata + + real (selected_real_kind(15)) :: t_cur, t_next, t_max, dt, x_max, dx, d + integer (LIBMUSCLE_size) :: U_size, n_steps, iteration + real (selected_real_kind(15)), dimension(:), allocatable :: U, dU + real (selected_real_kind(15)), dimension(:, :), allocatable :: Us + + + ports = LIBMUSCLE_PortsDescription_create() + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_I, 'state_out') + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_S, 'state_in') + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_F, 'final_state_out') + instance = LIBMUSCLE_Instance_create(ports, & + LIBMUSCLE_InstanceFlags(USES_CHECKPOINT_API=.true.)) + call LIBMUSCLE_PortsDescription_free(ports) + + do while (LIBMUSCLE_Instance_reuse_instance(instance)) + ! F_INIT + t_max = LIBMUSCLE_Instance_get_setting_as_real8(instance, 't_max') + dt = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'dt') + x_max = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'x_max') + dx = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'dx') + d = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'd') + + if (LIBMUSCLE_Instance_resuming(instance)) then + rmsg = LIBMUSCLE_Instance_load_snapshot(instance) + rdata = LIBMUSCLE_Message_get_data(rmsg) + + item = LIBMUSCLE_DataConstRef_get_item(rdata, 1_LIBMUSCLE_size) + call LIBMUSCLE_DataConstRef_shape(item, shp) + U_size = shp(1) + n_steps = shp(2) + call LIBMUSCLE_DataConstRef_elements(item, Us) + call LIBMUSCLE_DataConstRef_free(item) + + item = LIBMUSCLE_DataConstRef_get_item(rdata, 1_LIBMUSCLE_size) + iteration = LIBMUSCLE_DataConstRef_as_int(item) + call LIBMUSCLE_DataConstRef_free(item) + + U = Us(:, iteration) + t_cur = LIBMUSCLE_Message_timestamp(rmsg) + + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(rmsg) + end if + + if (LIBMUSCLE_Instance_should_init(instance)) then + U_size = nint(x_max / dx) + allocate (U(U_size), dU(U_size)) + U = 1e-20 + U(26) = 2.0 + U(51) = 2.0 + U(76) = 2.0 + + n_steps = int(t_max / dt) + allocate (Us(U_size, n_steps)) + + iteration = 1 + Us(:, iteration) = U + + t_cur = 0.0 + end if + + do while (t_cur + dt < t_max) + print *, 't_cur: ', t_cur, 't_max: ', t_max + ! O_I + sdata = LIBMUSCLE_Data_create_grid(U, 'x') + smsg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Data_free(sdata) + t_next = t_cur + dt + if (t_next + dt <= t_max) then + call LIBMUSCLE_Message_set_next_timestamp(smsg, t_next) + end if + call LIBMUSCLE_Instance_send(instance, 'state_out', smsg) + + ! S + rmsg = LIBMUSCLE_Instance_receive(instance, 'state_in', smsg) + rdata = LIBMUSCLE_Message_get_data(rmsg) + call LIBMUSCLE_DataConstRef_elements(rdata, U) + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(rmsg) + call LIBMUSCLE_Message_free(smsg) + + dU(2:U_size-1) = d * laplacian(U, dx) * dt + dU(1) = dU(2) + dU(U_size) = dU(U_size - 1) + + U = U + dU + iteration = iteration + 1 + Us(:, iteration) = U + + t_cur = t_cur + dt + + if (LIBMUSCLE_Instance_should_save_snapshot(instance, t_cur)) then + sdata = LIBMUSCLE_Data_create_grid(Us) + smsg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_save_snapshot(instance, smsg) + call LIBMUSCLE_Message_free(smsg) + call LIBMUSCLE_Data_free(sdata) + end if + end do + + ! O_F + sdata = LIBMUSCLE_Data_create_grid(U, 'x') + smsg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_send(instance, 'final_state_out', smsg) + call LIBMUSCLE_Message_free(smsg) + call LIBMUSCLE_Data_free(sdata) + + if (LIBMUSCLE_Instance_should_save_final_snapshot(instance)) then + sdata = LIBMUSCLE_Data_create_grid(Us) + smsg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_save_final_snapshot(instance, smsg) + call LIBMUSCLE_Message_free(smsg) + call LIBMUSCLE_Data_free(sdata) + end if + + deallocate (U, dU, Us) + print *, 'All done' + end do + + call LIBMUSCLE_Instance_free(instance) + +contains + + ! Calculates the Laplacian of vector Z. + ! + ! @param Z A vector representing a series of samples along a line. + ! @param dx The spacing between the samples. + + function laplacian(Z, dx) + real (selected_real_kind(15)), dimension(:), intent(in) :: Z + real (selected_real_kind(15)), intent(in) :: dx + real (selected_real_kind(15)), allocatable, dimension(:) :: laplacian + integer :: n + + n = size(Z) + allocate(laplacian(size(Z) - 2)) + laplacian = (Z(1:n-2) + Z(3:n) - 2.0d0 * Z(2:n-1)) / (dx * dx) + + end function laplacian + +end program diffusion + diff --git a/docs/source/examples/fortran/checkpointing_reaction.f90 b/docs/source/examples/fortran/checkpointing_reaction.f90 new file mode 100644 index 00000000..32749bd3 --- /dev/null +++ b/docs/source/examples/fortran/checkpointing_reaction.f90 @@ -0,0 +1,104 @@ +program reaction + use ymmsl + use libmuscle + implicit none + + type(LIBMUSCLE_PortsDescription) :: ports + type(LIBMUSCLE_Instance) :: instance + + type(LIBMUSCLE_Message) :: rmsg + type(LIBMUSCLE_DataConstRef) :: rdata, item + + + type(LIBMUSCLE_Message) :: smsg + type(LIBMUSCLE_Data) :: sdata, sitem + + real (selected_real_kind(15)) :: t_cur, t_max, dt, k + real (selected_real_kind(15)), dimension(:), allocatable :: U + + + ports = LIBMUSCLE_PortsDescription_create() + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_F_INIT, 'initial_state') + call LIBMUSCLE_PortsDescription_add(ports, YMMSL_Operator_O_F, 'final_state') + instance = LIBMUSCLE_Instance_create(ports, & + LIBMUSCLE_InstanceFlags(USES_CHECKPOINT_API=.true.)) + call LIBMUSCLE_PortsDescription_free(ports) + + do while (LIBMUSCLE_Instance_reuse_instance(instance)) + ! F_INIT + t_max = LIBMUSCLE_Instance_get_setting_as_real8(instance, 't_max') + dt = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'dt') + k = LIBMUSCLE_Instance_get_setting_as_real8(instance, 'k') + + if (LIBMUSCLE_Instance_resuming(instance)) then + rmsg = LIBMUSCLE_Instance_load_snapshot(instance) + rdata = LIBMUSCLE_Message_get_data(rmsg) + if (.not. LIBMUSCLE_DataConstRef_is_nil(rdata)) then + ! A final snapshot does not have data in it, but that's fine: we + ! will do the F_INIT step inside `should_init` below. + item = LIBMUSCLE_DataConstRef_get_item(rdata, 1_LIBMUSCLE_size) + allocate (U(LIBMUSCLE_DataConstRef_size(item))) + call LIBMUSCLE_DataConstRef_elements(item, U) + call LIBMUSCLE_DataConstRef_free(item) + + t_cur = LIBMUSCLE_Message_timestamp(rmsg) + + item = LIBMUSCLE_DataConstRef_get_item(rdata, 2_LIBMUSCLE_size) + t_max = LIBMUSCLE_DataConstRef_as_real8(item) + call LIBMUSCLE_DataConstRef_free(item) + end if + call LIBMUSCLE_DataConstRef_free(rdata) + call LIBMUSCLE_Message_free(rmsg) + end if + + if (LIBMUSCLE_Instance_should_init(instance)) then + rmsg = LIBMUSCLE_Instance_receive(instance, 'initial_state') + rdata = LIBMUSCLE_Message_get_data(rmsg) + allocate (U(LIBMUSCLE_DataConstRef_size(rdata))) + call LIBMUSCLE_DataConstRef_elements(rdata, U) + call LIBMUSCLE_DataConstRef_free(rdata) + + t_cur = LIBMUSCLE_Message_timestamp(rmsg) + t_max = LIBMUSCLE_Message_timestamp(rmsg) + t_max + call LIBMUSCLE_Message_free(rmsg) + end if + + do while (t_cur + dt < t_max) + ! O_I + + ! S + U = k * U * dt + t_cur = t_cur + dt + + if (LIBMUSCLE_Instance_should_save_snapshot(instance, t_cur)) then + sdata = LIBMUSCLE_Data_create_nils(2_LIBMUSCLE_size) + sitem = LIBMUSCLE_Data_create_grid(U, 'x') + call LIBMUSCLE_Data_set_item(sdata, 1_LIBMUSCLE_size, sitem) + call LIBMUSCLE_Data_set_item(sdata, 2_LIBMUSCLE_size, t_max) + smsg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_save_snapshot(instance, smsg) + call LIBMUSCLE_Message_free(smsg) + call LIBMUSCLE_Data_free(sitem) + call LIBMUSCLE_Data_free(sdata) + end if + end do + + ! O_F + sdata = LIBMUSCLE_Data_create_grid(U, 'x') + smsg = LIBMUSCLE_Message_create(t_cur, sdata) + call LIBMUSCLE_Instance_send(instance, 'final_state', smsg) + call LIBMUSCLE_Message_free(smsg) + call LIBMUSCLE_Data_free(sdata) + deallocate (U) + + if (LIBMUSCLE_Instance_should_save_final_snapshot(instance)) then + smsg = LIBMUSCLE_Message_create(t_cur) + call LIBMUSCLE_Instance_save_final_snapshot(instance, smsg) + call LIBMUSCLE_Message_free(smsg) + end if + end do + + call LIBMUSCLE_Instance_free(instance) + +end program reaction + diff --git a/docs/source/examples/rd_checkpoints_cpp.ymmsl b/docs/source/examples/rd_checkpoints_cpp.ymmsl new file mode 100644 index 00000000..b8157a90 --- /dev/null +++ b/docs/source/examples/rd_checkpoints_cpp.ymmsl @@ -0,0 +1,33 @@ +ymmsl_version: v0.1 + +model: + name: checkpointing_reaction_diffusion_cpp + + components: + macro: + implementation: checkpointing_diffusion_cpp + ports: + o_i: state_out + s: state_in + + micro: + implementation: checkpointing_reaction_cpp + ports: + f_init: initial_state + o_f: final_state + + conduits: + macro.state_out: micro.initial_state + micro.final_state: macro.state_in + +resources: + macro: + threads: 1 + micro: + threads: 1 + +# Note: below three lines are explicitly mentioned in checkpointing.rst. Do not +# forget to update that literalinclude when the line numbers change! +checkpoints: + simulation_time: + - every: 2.0e-05 diff --git a/docs/source/examples/rd_checkpoints_fortran.ymmsl b/docs/source/examples/rd_checkpoints_fortran.ymmsl new file mode 100644 index 00000000..ff3ad62e --- /dev/null +++ b/docs/source/examples/rd_checkpoints_fortran.ymmsl @@ -0,0 +1,33 @@ +ymmsl_version: v0.1 + +model: + name: checkpointing_reaction_diffusion_fortran + + components: + macro: + implementation: checkpointing_diffusion_fortran + ports: + o_i: state_out + s: state_in + + micro: + implementation: checkpointing_reaction_fortran + ports: + f_init: initial_state + o_f: final_state + + conduits: + macro.state_out: micro.initial_state + micro.final_state: macro.state_in + +resources: + macro: + threads: 1 + micro: + threads: 1 + +# Note: below three lines are explicitly mentioned in checkpointing.rst. Do not +# forget to update that literalinclude when the line numbers change! +checkpoints: + simulation_time: + - every: 2.0e-05 diff --git a/docs/source/examples/rd_checkpoints.ymmsl b/docs/source/examples/rd_checkpoints_python.ymmsl similarity index 100% rename from docs/source/examples/rd_checkpoints.ymmsl rename to docs/source/examples/rd_checkpoints_python.ymmsl diff --git a/docs/source/examples/rd_implementations.ymmsl.in b/docs/source/examples/rd_implementations.ymmsl.in index 92cc2b8d..05822d52 100644 --- a/docs/source/examples/rd_implementations.ymmsl.in +++ b/docs/source/examples/rd_implementations.ymmsl.in @@ -68,7 +68,27 @@ implementations: executable: python args: MUSCLE3_EXAMPLES/python/checkpointing_reaction.py + checkpointing_reaction_cpp: + env: + +LD_LIBRARY_PATH: :MUSCLE3_HOME/lib + executable: MUSCLE3_EXAMPLES/cpp/build/checkpointing_reaction + + checkpointing_reaction_fortran: + env: + +LD_LIBRARY_PATH: :MUSCLE3_HOME/lib + executable: MUSCLE3_EXAMPLES/fortran/build/checkpointing_reaction + checkpointing_diffusion_python: virtual_env: MUSCLE3_EXAMPLES/python/build/venv executable: python args: MUSCLE3_EXAMPLES/python/checkpointing_diffusion.py + + checkpointing_diffusion_cpp: + env: + +LD_LIBRARY_PATH: :MUSCLE3_HOME/lib + executable: MUSCLE3_EXAMPLES/cpp/build/checkpointing_diffusion + + checkpointing_diffusion_fortran: + env: + +LD_LIBRARY_PATH: :MUSCLE3_HOME/lib + executable: MUSCLE3_EXAMPLES/fortran/build/checkpointing_diffusion From ffb2e38bf334d5b9ad1d2677dc7a49edacc93cb5 Mon Sep 17 00:00:00 2001 From: Maarten Sebregts Date: Tue, 21 Feb 2023 13:52:24 +0100 Subject: [PATCH 072/188] C++ and Fortran checkpointing documentation --- docs/source/checkpointing.rst | 295 +++++++++++------- .../cpp/reaction_no_state_for_next_use.cpp | 56 ++++ .../reaction_no_state_for_next_use.f90 | 63 ++++ .../python/reaction_no_state_for_next_use.py | 2 +- .../templates/checkpointing_instance.cpp | 77 +++++ .../templates/checkpointing_instance.f90 | 92 ++++++ .../checkpointing_diffusion_partial.cpp | 122 ++++++++ .../checkpointing_diffusion_partial.f90 | 138 ++++++++ .../checkpointing_instance_partial.cpp | 70 +++++ .../checkpointing_instance_partial.f90 | 83 +++++ .../checkpointing_reaction_partial.cpp | 68 ++++ .../checkpointing_reaction_partial.f90 | 81 +++++ 12 files changed, 1029 insertions(+), 118 deletions(-) create mode 100644 docs/source/examples/cpp/reaction_no_state_for_next_use.cpp create mode 100644 docs/source/examples/fortran/reaction_no_state_for_next_use.f90 create mode 100644 docs/source/templates/checkpointing_instance.cpp create mode 100644 docs/source/templates/checkpointing_instance.f90 create mode 100644 docs/source/tutorial_code/checkpointing_diffusion_partial.cpp create mode 100644 docs/source/tutorial_code/checkpointing_diffusion_partial.f90 create mode 100644 docs/source/tutorial_code/checkpointing_instance_partial.cpp create mode 100644 docs/source/tutorial_code/checkpointing_instance_partial.f90 create mode 100644 docs/source/tutorial_code/checkpointing_reaction_partial.cpp create mode 100644 docs/source/tutorial_code/checkpointing_reaction_partial.f90 diff --git a/docs/source/checkpointing.rst b/docs/source/checkpointing.rst index 3e1546ba..bbac8932 100644 --- a/docs/source/checkpointing.rst +++ b/docs/source/checkpointing.rst @@ -13,14 +13,6 @@ comes with built-in checkpointing support. This page describes in detail how to use the MUSCLE3 checkpointing API, how to specify checkpoints in the workflow configuration and how to resume a workflow. -.. warning:: - - Checkpointing in MUSCLE3 version 0.6.0 is still in development: the API may - change in a future MUSCLE3 release. - - Checkpointing is only available in the Python API. C++ and Fortran support - is planned for version 0.7.0. - In the :ref:`user tutorial`, you can read about the checkpointing concepts and how to use the API when running and resuming MUSCLE3 simulations. This is followed by a :ref:`developer tutorial`, which explains how to add checkpointing @@ -359,7 +351,7 @@ repository. Then execute the following command: .. code-block:: bash $ mkdir run_rd_example - $ muscle_manager --start-all --run-dir run_rd_example rd_implementations.ymmsl rd_checkpoints.ymmsl rd_settings.ymmsl + $ muscle_manager --start-all --run-dir run_rd_example rd_implementations.ymmsl rd_checkpoints_python.ymmsl rd_settings.ymmsl .. note:: @@ -372,11 +364,11 @@ repository. Then execute the following command: $ make test_examples The above command runs the ``muscle_manager`` and starts all components (the -reaction model and the diffusion model). The ``rd_checkpoints.ymmsl`` file +reaction model and the diffusion model). The ``rd_checkpoints_python.ymmsl`` file contains the checkpoint definitions used in this example: -.. literalinclude:: examples/rd_checkpoints.ymmsl - :caption: ``docs/source/examples/rd_checkpoints.ymmsl, lines 31-33`` +.. literalinclude:: examples/rd_checkpoints_python.ymmsl + :caption: ``docs/source/examples/rd_checkpoints_python.ymmsl, lines 31-33`` :lines: 31-33 :language: yaml @@ -428,7 +420,7 @@ point to the snapshot you want to resume from. :caption: Resume from an earlier snapshot. Replace ```` and ``