From dbb4d994995089867c507e3d3045af0e5b5c912c Mon Sep 17 00:00:00 2001 From: Dan Birman Date: Mon, 21 Oct 2024 10:21:46 -0700 Subject: [PATCH] Support multi session quality control by tracking metric provenance (#1106) * feat: adding provenance * chore: lint * feat: adding multi-asset stage and typo * chore: examples * refactor: adding validator and renaming tests are still todo * tests: adding tests, minor fixes * tests: examples and line length * fix: multi_session->multi_asset * fix: session suffix to asset * fix: typo * fix: more typos --- examples/quality_control.json | 56 +++++++------ src/aind_data_schema/core/quality_control.py | 34 +++++++- tests/test_metadata.py | 26 +++--- tests/test_quality_control.py | 85 ++++++++++++++++++++ 4 files changed, 162 insertions(+), 39 deletions(-) diff --git a/examples/quality_control.json b/examples/quality_control.json index 7f0f9b29..fdb50d08 100644 --- a/examples/quality_control.json +++ b/examples/quality_control.json @@ -27,15 +27,16 @@ ], "type": "dropdown" }, - "description": null, - "reference": "ecephys-drift-map", "status_history": [ { "evaluator": "", "status": "Pending", "timestamp": "2022-11-22T00:00:00Z" } - ] + ], + "description": null, + "reference": "ecephys-drift-map", + "evaluated_assets": null }, { "name": "Probe B drift", @@ -53,28 +54,30 @@ ], "type": "checkbox" }, - "description": null, - "reference": "ecephys-drift-map", "status_history": [ { "evaluator": "", "status": "Pending", "timestamp": "2022-11-22T00:00:00Z" } - ] + ], + "description": null, + "reference": "ecephys-drift-map", + "evaluated_assets": null }, { "name": "Probe C drift", "value": "Low", - "description": null, - "reference": "ecephys-drift-map", "status_history": [ { "evaluator": "Automated", "status": "Pass", "timestamp": "2022-11-22T00:00:00Z" } - ] + ], + "description": null, + "reference": "ecephys-drift-map", + "evaluated_assets": null } ], "notes": "", @@ -92,28 +95,30 @@ { "name": "video_1_num_frames", "value": 662, - "description": null, - "reference": null, "status_history": [ { "evaluator": "Automated", "status": "Pass", "timestamp": "2022-11-22T00:00:00Z" } - ] + ], + "description": null, + "reference": null, + "evaluated_assets": null }, { "name": "video_2_num_frames", "value": 662, - "description": null, - "reference": null, "status_history": [ { "evaluator": "Automated", "status": "Pass", "timestamp": "2022-11-22T00:00:00Z" } - ] + ], + "description": null, + "reference": null, + "evaluated_assets": null } ], "notes": "Pass when video_1_num_frames==video_2_num_frames", @@ -131,41 +136,44 @@ { "name": "ProbeA_success", "value": true, - "description": null, - "reference": null, "status_history": [ { "evaluator": "Automated", "status": "Pass", "timestamp": "2022-11-22T00:00:00Z" } - ] + ], + "description": null, + "reference": null, + "evaluated_assets": null }, { "name": "ProbeB_success", "value": true, - "description": null, - "reference": null, "status_history": [ { "evaluator": "Automated", "status": "Pass", "timestamp": "2022-11-22T00:00:00Z" } - ] + ], + "description": null, + "reference": null, + "evaluated_assets": null }, { "name": "ProbeC_success", "value": true, - "description": null, - "reference": null, "status_history": [ { "evaluator": "Automated", "status": "Pass", "timestamp": "2022-11-22T00:00:00Z" } - ] + ], + "description": null, + "reference": null, + "evaluated_assets": null } ], "notes": null, diff --git a/src/aind_data_schema/core/quality_control.py b/src/aind_data_schema/core/quality_control.py index ad91e380..04661f84 100644 --- a/src/aind_data_schema/core/quality_control.py +++ b/src/aind_data_schema/core/quality_control.py @@ -4,7 +4,7 @@ from typing import Any, List, Literal, Optional from aind_data_schema_models.modalities import Modality -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, field_validator, model_validator from aind_data_schema.base import AindCoreModel, AindModel, AwareDatetimeWithDefault @@ -26,6 +26,7 @@ class Stage(str, Enum): RAW = "Raw data" PROCESSING = "Processing" ANALYSIS = "Analysis" + MULTI_ASSET = "Multi-asset" class QCStatus(BaseModel): @@ -41,9 +42,14 @@ class QCMetric(BaseModel): name: str = Field(..., title="Metric name") value: Any = Field(..., title="Metric value") + status_history: List[QCStatus] = Field(default=[], title="Metric status history") description: Optional[str] = Field(default=None, title="Metric description") reference: Optional[str] = Field(default=None, title="Metric reference image URL or plot type") - status_history: List[QCStatus] = Field(default=[], title="Metric status history") + evaluated_assets: Optional[List[str]] = Field( + default=None, + title="List of asset names that this metric depends on", + description="Set to None except when a metric's calculation required data coming from a different data asset.", + ) @property def status(self) -> QCStatus: @@ -125,6 +131,30 @@ def failed_metrics(self) -> Optional[List[QCMetric]]: return failing_metrics + @model_validator(mode="after") + def validate_multi_asset(cls, v): + """Ensure that the evaluated_assets field in any attached metrics is set correctly""" + stage = v.stage + metrics = v.metrics + + if stage == Stage.MULTI_ASSET: + for metric in metrics: + if not metric.evaluated_assets or len(metric.evaluated_assets) == 0: + raise ValueError( + f"Metric '{metric.name}' is in a multi-asset QCEvaluation and must have evaluated_assets set." + ) + else: + # make sure all evaluated assets are None + for metric in metrics: + if metric.evaluated_assets: + raise ValueError( + ( + f"Metric '{metric.name}' is in a single-asset QCEvaluation" + " and should not have evaluated_assets" + ) + ) + return v + class QualityControl(AindCoreModel): """Description of quality metrics for a data asset""" diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 37954238..941a7952 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -285,19 +285,19 @@ def test_validate_underscore_modality(self): # Tests missing metadata surgery1 = Surgery.model_construct(procedures=[nano_inj, ionto_inj]) m = Metadata( - name="ecephys_655019_2023-04-03_18-17-09", - location="bucket", - data_description=DataDescription.model_construct( - label="some label", - platform=Platform.ECEPHYS, - creation_time=time(12, 12, 12), - modality=[Modality.BEHAVIOR_VIDEOS], - ), - subject=Subject.model_construct(), - procedures=Procedures.model_construct(subject_procedures=[surgery1]), - rig=rig, - session=session, - ) + name="ecephys_655019_2023-04-03_18-17-09", + location="bucket", + data_description=DataDescription.model_construct( + label="some label", + platform=Platform.ECEPHYS, + creation_time=time(12, 12, 12), + modality=[Modality.BEHAVIOR_VIDEOS], + ), + subject=Subject.model_construct(), + procedures=Procedures.model_construct(subject_procedures=[surgery1]), + rig=rig, + session=session, + ) self.assertIsNotNone(m) def test_validate_rig_session_compatibility(self): diff --git a/tests/test_quality_control.py b/tests/test_quality_control.py index 1b287f05..4da9186a 100644 --- a/tests/test_quality_control.py +++ b/tests/test_quality_control.py @@ -282,6 +282,91 @@ def test_metric_status(self): self.assertTrue(expected_exception in repr(context.exception)) + def test_multi_session(self): + """Ensure that the multi-asset QCEvaluation validator checks for evaluated_assets""" + # Check for non-multi-session that all evaluated_assets are None + t0 = datetime.fromisoformat("2020-10-10") + + evaluation = QCEvaluation( + name="Drift map", + modality=Modality.ECEPHYS, + stage=Stage.PROCESSING, + metrics=[ + QCMetric( + name="Multiple values example", + value={"stuff": "in_a_dict"}, + status_history=[ + QCStatus(evaluator="Automated", timestamp=t0, status=Status.PASS), + ], + ), + ], + ) + + self.assertTrue(evaluation.stage != Stage.MULTI_ASSET) + self.assertIsNone(evaluation.metrics[0].evaluated_assets) + + # Check that single-asset QC with evaluated_assets throws a validation error + with self.assertRaises(ValidationError) as context: + QCEvaluation( + name="Drift map", + modality=Modality.ECEPHYS, + stage=Stage.PROCESSING, + metrics=[ + QCMetric( + name="Multiple values example", + value={"stuff": "in_a_dict"}, + status_history=[ + QCStatus(evaluator="Automated", timestamp=t0, status=Status.PASS), + ], + evaluated_assets=["asset0", "asset1"], + ), + ], + ) + + print(context.exception) + self.assertTrue( + "is in a single-asset QCEvaluation and should not have evaluated_assets" in repr(context.exception) + ) + + # Check that multi-asset with empty evaluated_assets raises a validation error + with self.assertRaises(ValidationError) as context: + QCEvaluation( + name="Drift map", + modality=Modality.ECEPHYS, + stage=Stage.MULTI_ASSET, + metrics=[ + QCMetric( + name="Multiple values example", + value={"stuff": "in_a_dict"}, + status_history=[ + QCStatus(evaluator="Automated", timestamp=t0, status=Status.PASS), + ], + evaluated_assets=[], + ), + ], + ) + + self.assertTrue("is in a multi-asset QCEvaluation and must have evaluated_assets" in repr(context.exception)) + + # Check that multi-asset with missing evaluated_assets raises a validation error + with self.assertRaises(ValidationError) as context: + QCEvaluation( + name="Drift map", + modality=Modality.ECEPHYS, + stage=Stage.MULTI_ASSET, + metrics=[ + QCMetric( + name="Multiple values example", + value={"stuff": "in_a_dict"}, + status_history=[ + QCStatus(evaluator="Automated", timestamp=t0, status=Status.PASS), + ], + ), + ], + ) + + self.assertTrue("is in a multi-asset QCEvaluation and must have evaluated_assets" in repr(context.exception)) + if __name__ == "__main__": unittest.main()