Merge pull request #237 from Perfexionists/command-import

Add perun import command
Perfexionists · Jul 22, 2024 · c1e1484 · c1e1484
2 parents 2aa98d6 + 25f323d
commit c1e1484
Show file tree

Hide file tree

Showing 13 changed files with 3,772 additions and 2 deletions.
diff --git a/perun/cli.py b/perun/cli.py
@@ -50,7 +50,7 @@
 import click
 
 # Perun Imports
-from perun.cli_groups import check_cli, config_cli, run_cli, utils_cli
+from perun.cli_groups import check_cli, config_cli, run_cli, utils_cli, import_cli
 from perun.logic import commands, pcs, config as perun_config
 from perun.utils import exceptions, log as perun_log
 from perun.utils.common import cli_kit, common_kit
@@ -1292,6 +1292,7 @@ def init_unit_commands(lazy_init: bool = True) -> None:
 cli.add_command(config_cli.config)
 cli.add_command(run_cli.run)
 cli.add_command(utils_cli.utils_group)
+cli.add_command(import_cli.import_group)
 
 
 def launch_cli_in_dev_mode() -> None:

diff --git a/perun/cli_groups/config_cli.py b/perun/cli_groups/config_cli.py
@@ -13,7 +13,6 @@
 from perun.utils import log as perun_log
 from perun.utils.common import cli_kit
 from perun.utils.exceptions import (
-    NotPerunRepositoryException,
     MissingConfigSectionException,
     ExternalEditorErrorException,
 )

diff --git a/perun/cli_groups/import_cli.py b/perun/cli_groups/import_cli.py
@@ -0,0 +1,128 @@
+"""Group of CLI commands used for importing profiles"""
+
+from __future__ import annotations
+
+# Standard Imports
+from typing import Any
+
+# Third-Party Imports
+import click
+
+# Perun Imports
+from perun.logic import commands
+from perun.profile import imports
+from perun.utils.common import cli_kit
+
+
+@click.group("import")
+@click.option(
+    "--machine-info",
+    "-m",
+    type=click.Path(resolve_path=True, readable=True),
+    help="Imports machine info from file in JSON format (by default, machine info is loaded from the current host)."
+    "You can use `utils/generate_machine_info.sh` script to generate the machine info file.",
+)
+@click.option(
+    "--minor-version",
+    "-m",
+    "minor_version_list",
+    nargs=1,
+    multiple=True,
+    callback=cli_kit.minor_version_list_callback,
+    default=["HEAD"],
+    help="Specifies the head minor version, for which the profiles will be imported.",
+)
+@click.option(
+    "--cmd",
+    "-c",
+    nargs=1,
+    required=False,
+    multiple=True,
+    default=[""],
+    help=(
+        "Command that was being profiled. Either corresponds to some"
+        " script, binary or command, e.g. ``./mybin`` or ``perun``."
+    ),
+)
+@click.option(
+    "--workload",
+    "-w",
+    nargs=1,
+    required=False,
+    multiple=True,
+    default=[""],
+    help="Inputs for <cmd>. E.g. ``./subdir`` is possible workload for ``ls`` command.",
+)
+@click.option(
+    "--save-to-index",
+    "-s",
+    is_flag=True,
+    help="Saves the imported profile to index.",
+    default=False,
+)
+@click.pass_context
+def import_group(ctx: click.Context, **kwargs: Any) -> None:
+    """Imports Perun profiles from different formats"""
+    commands.try_init()
+    ctx.obj = kwargs
+
+
+@import_group.group("perf")
+@click.option(
+    "--warmup",
+    "-w",
+    multiple=True,
+    default=[0],
+    help="Sets [INT] warm up iterations of ith profiled command.",
+)
+@click.option(
+    "--repeat",
+    "-r",
+    multiple=True,
+    default=[1],
+    help="Sets [INT] samplings of the ith profiled command.",
+)
+@click.pass_context
+def perf_group(ctx: click.Context, **kwargs: Any) -> None:
+    """Imports Perun profiles from perf results
+
+    This supports either profiles collected in:
+
+      1. Binary format: e.g., `collected.data` files, that are results of `perf record`
+      2. Text format: result of `perf script` that parses the binary into user-friendly and parsing-friendly text format
+    """
+    ctx.obj.update(kwargs)
+
+
+@perf_group.command("record")
+@click.argument("imported", nargs=-1, required=True)
+@click.pass_context
+@click.option(
+    "--with-sudo",
+    "-s",
+    is_flag=True,
+    help="Runs the conversion of the data in sudo mode.",
+    default=False,
+)
+def from_binary(ctx: click.Context, imported: list[str], **kwargs: Any) -> None:
+    """Imports Perun profiles from binary generated by `perf record` command"""
+    kwargs.update(ctx.obj)
+    imports.import_perf_from_record(imported, **kwargs)
+
+
+@perf_group.command("script")
+@click.argument("imported", type=click.Path(resolve_path=True), nargs=-1, required=True)
+@click.pass_context
+def from_text(ctx: click.Context, imported: list[str], **kwargs: Any) -> None:
+    """Import Perun profiles from output generated by `perf script` command"""
+    kwargs.update(ctx.obj)
+    imports.import_perf_from_script(imported, **kwargs)
+
+
+@perf_group.command("stack")
+@click.argument("imported", type=click.Path(resolve_path=True), nargs=-1, required=True)
+@click.pass_context
+def from_stacks(ctx: click.Context, imported: list[str], **kwargs: Any) -> None:
+    """Import Perun profiles from output generated by `perf script | stackcollapse-perf.pl` command"""
+    kwargs.update(ctx.obj)
+    imports.import_perf_from_stack(imported, **kwargs)
diff --git a/perun/cli_groups/meson.build b/perun/cli_groups/meson.build
@@ -4,6 +4,7 @@ perun_cli_groups_files = files(
     '__init__.py',
     'check_cli.py',
     'config_cli.py',
+    'import_cli.py',
     'run_cli.py',
     'utils_cli.py',
 )

diff --git a/perun/profile/imports.py b/perun/profile/imports.py
@@ -0,0 +1,204 @@
+"""Functions for importing Profile from different formats"""
+
+from __future__ import annotations
+
+# Standard Imports
+from typing import Any, Optional
+import json
+import os
+import subprocess
+
+# Third-Party Imports
+
+# Perun Imports
+from perun.collect.kperf import parser
+from perun.profile import helpers as profile_helpers
+from perun.logic import commands, index, pcs
+from perun.utils import log, streams
+from perun.utils.common import script_kit
+from perun.utils.external import commands as external_commands, environment
+from perun.utils.structs import MinorVersion
+from perun.profile.factory import Profile
+
+
+def get_machine_info(machine_info: Optional[str] = None) -> dict[str, Any]:
+    """Returns machine info either from input file or constructs it from environment
+
+    :param machine info: file in json format, which contains machine specification
+    :return: parsed dictionary format of machine specification
+    """
+    if machine_info is not None:
+        with open(machine_info, "r") as machine_handle:
+            return json.load(machine_handle)
+    else:
+        return environment.get_machine_specification()
+
+
+def get_param(cfg: dict[str, Any], param: str, index: int) -> Any:
+    """Helper function for retrieving parameter from the dictionary of lists.
+
+    This assumes, that dictionary contains list of parameters under certain keys.
+    It retrieves the list under the key and then returns the index. The function
+    fails, when the index is out of bounds.
+
+    :param l: list we are getting from
+    :param param: param which contains the list
+    :param index: index from which we are retrieving
+    :return: value of the param
+    """
+    assert index < len(cfg[param]), f"Not enough values set up for the '{param}' command."
+    return cfg[param][index]
+
+
+def import_from_string(
+    out: str,
+    minor_version: MinorVersion,
+    prof_index: int,
+    machine_info: Optional[str] = None,
+    with_sudo: bool = False,
+    save_to_index: bool = False,
+    **kwargs: Any,
+) -> None:
+    resources = parser.parse_events(out.split("\n"))
+    prof = Profile(
+        {
+            "global": {
+                "time": "???",
+                "resources": resources,
+            }
+        }
+    )
+    prof.update({"origin": minor_version.checksum})
+    prof.update({"machine": get_machine_info(machine_info)})
+    prof.update(
+        {
+            "header": {
+                "type": "time",
+                "cmd": get_param(kwargs, "cmd", prof_index),
+                "workload": get_param(kwargs, "workload", prof_index),
+                "units": {"time": "sample"},
+            }
+        }
+    )
+    prof.update(
+        {
+            "collector_info": {
+                "name": "kperf",
+                "params": {
+                    "with_sudo": with_sudo,
+                    "warmup": get_param(kwargs, "warmup", prof_index),
+                    "repeat": get_param(kwargs, "repeat", prof_index),
+                },
+            }
+        }
+    )
+    prof.update({"postprocessors": []})
+
+    full_profile_name = profile_helpers.generate_profile_name(prof)
+    profile_directory = pcs.get_job_directory()
+    full_profile_path = os.path.join(profile_directory, full_profile_name)
+
+    streams.store_json(prof.serialize(), full_profile_path)
+    log.minor_status(
+        "stored generated profile ",
+        status=f"{log.path_style(os.path.relpath(full_profile_path))}",
+    )
+    if save_to_index:
+        commands.add([full_profile_path], minor_version.checksum, keep_profile=False)
+    else:
+        # Else we register the profile in pending index
+        index.register_in_pending_index(full_profile_path, prof)
+
+
+def import_perf_from_record(
+    imported: list[str],
+    machine_info: Optional[str],
+    minor_version_list: list[MinorVersion],
+    with_sudo: bool = False,
+    save_to_index: bool = False,
+    **kwargs: Any,
+) -> None:
+    """Imports profile collected by `perf record`"""
+    assert (
+        len(minor_version_list) == 1
+    ), f"One can import profile for single version only (got {len(minor_version_list)} instead)"
+
+    parse_script = script_kit.get_script("stackcollapse-perf.pl")
+    out = b""
+
+    for i, imported_file in enumerate(imported):
+        perf_script_command = (
+            f"{'sudo ' if with_sudo else ''}perf script -i {imported_file} | {parse_script}"
+        )
+        try:
+            out, _ = external_commands.run_safely_external_command(perf_script_command)
+            log.minor_success(f"Raw data from {log.path_style(imported_file)}", "collected")
+        except subprocess.CalledProcessError as err:
+            log.minor_fail(f"Raw data from {log.path_style(imported_file)}", "not collected")
+            log.error(f"Cannot load data due to: {err}")
+        import_from_string(
+            out.decode("utf-8"),
+            minor_version_list[0],
+            i,
+            machine_info,
+            with_sudo=with_sudo,
+            save_to_index=save_to_index,
+            **kwargs,
+        )
+        log.minor_success(log.path_style(imported_file), "imported")
+
+
+def import_perf_from_script(
+    imported: list[str],
+    machine_info: Optional[str],
+    minor_version_list: list[MinorVersion],
+    save_to_index: bool = False,
+    **kwargs: Any,
+) -> None:
+    """Imports profile collected by `perf record; perf script`"""
+    assert (
+        len(minor_version_list) == 1
+    ), f"One can import profile for single version only (got {len(minor_version_list)} instead)"
+
+    parse_script = script_kit.get_script("stackcollapse-perf.pl")
+    out = b""
+
+    for i, imported_file in enumerate(imported):
+        perf_script_command = f"cat {imported_file} | {parse_script}"
+        out, _ = external_commands.run_safely_external_command(perf_script_command)
+        log.minor_success(f"Raw data from {log.path_style(imported_file)}", "collected")
+        import_from_string(
+            out.decode("utf-8"),
+            minor_version_list[0],
+            i,
+            machine_info,
+            save_to_index=save_to_index,
+            **kwargs,
+        )
+        log.minor_success(log.path_style(imported_file), "imported")
+
+
+def import_perf_from_stack(
+    imported: list[str],
+    machine_info: Optional[str],
+    minor_version_list: list[MinorVersion],
+    save_to_index: bool = False,
+    **kwargs: Any,
+) -> None:
+    """Imports profile collected by `perf record; perf script | stackcollapse-perf.pl`"""
+    assert (
+        len(minor_version_list) == 1
+    ), f"One can import profile for single version only (got {len(minor_version_list)} instead)"
+
+    for i, imported_file in enumerate(imported):
+        with open(imported_file, "r", encoding="utf-8") as imported_handle:
+            out = imported_handle.read()
+        import_from_string(
+            out,
+            minor_version_list[0],
+            i,
+            machine_info,
+            save_to_index=save_to_index,
+            **kwargs,
+        )
+        log.minor_success(log.path_style(imported_file), "imported")
diff --git a/perun/profile/meson.build b/perun/profile/meson.build
@@ -5,6 +5,7 @@ perun_profile_files = files(
     'convert.py',
     'factory.py',
     'helpers.py',
+    'imports.py',
     'query.py',
 )
 

diff --git a/perun/utils/external/environment.py b/perun/utils/external/environment.py
@@ -167,5 +167,6 @@ def get_machine_specification() -> dict[str, Any]:
                     for (key, value) in [line.split(":") for line in cpu_line.split("\n") if line]
                 }
                 for cpu_line in cpuinfo_handle.read().split("\n\n")
+                if cpu_line
             ]
     return machine_info
diff --git a/tests/sources/imports/import.data b/tests/sources/imports/import.data