Skip to content

Commit

Permalink
Support multi session quality control by tracking metric provenance (#…
Browse files Browse the repository at this point in the history
…1106)

* feat: adding provenance

* chore: lint

* feat: adding multi-asset stage and typo

* chore: examples

* refactor: adding validator and renaming

tests are still todo

* tests: adding tests, minor fixes

* tests: examples and line length

* fix: multi_session->multi_asset

* fix: session suffix to asset

* fix: typo

* fix: more typos
  • Loading branch information
dbirman authored Oct 21, 2024
1 parent d080f14 commit dbb4d99
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 39 deletions.
56 changes: 32 additions & 24 deletions examples/quality_control.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@
],
"type": "dropdown"
},
"description": null,
"reference": "ecephys-drift-map",
"status_history": [
{
"evaluator": "",
"status": "Pending",
"timestamp": "2022-11-22T00:00:00Z"
}
]
],
"description": null,
"reference": "ecephys-drift-map",
"evaluated_assets": null
},
{
"name": "Probe B drift",
Expand All @@ -53,28 +54,30 @@
],
"type": "checkbox"
},
"description": null,
"reference": "ecephys-drift-map",
"status_history": [
{
"evaluator": "",
"status": "Pending",
"timestamp": "2022-11-22T00:00:00Z"
}
]
],
"description": null,
"reference": "ecephys-drift-map",
"evaluated_assets": null
},
{
"name": "Probe C drift",
"value": "Low",
"description": null,
"reference": "ecephys-drift-map",
"status_history": [
{
"evaluator": "Automated",
"status": "Pass",
"timestamp": "2022-11-22T00:00:00Z"
}
]
],
"description": null,
"reference": "ecephys-drift-map",
"evaluated_assets": null
}
],
"notes": "",
Expand All @@ -92,28 +95,30 @@
{
"name": "video_1_num_frames",
"value": 662,
"description": null,
"reference": null,
"status_history": [
{
"evaluator": "Automated",
"status": "Pass",
"timestamp": "2022-11-22T00:00:00Z"
}
]
],
"description": null,
"reference": null,
"evaluated_assets": null
},
{
"name": "video_2_num_frames",
"value": 662,
"description": null,
"reference": null,
"status_history": [
{
"evaluator": "Automated",
"status": "Pass",
"timestamp": "2022-11-22T00:00:00Z"
}
]
],
"description": null,
"reference": null,
"evaluated_assets": null
}
],
"notes": "Pass when video_1_num_frames==video_2_num_frames",
Expand All @@ -131,41 +136,44 @@
{
"name": "ProbeA_success",
"value": true,
"description": null,
"reference": null,
"status_history": [
{
"evaluator": "Automated",
"status": "Pass",
"timestamp": "2022-11-22T00:00:00Z"
}
]
],
"description": null,
"reference": null,
"evaluated_assets": null
},
{
"name": "ProbeB_success",
"value": true,
"description": null,
"reference": null,
"status_history": [
{
"evaluator": "Automated",
"status": "Pass",
"timestamp": "2022-11-22T00:00:00Z"
}
]
],
"description": null,
"reference": null,
"evaluated_assets": null
},
{
"name": "ProbeC_success",
"value": true,
"description": null,
"reference": null,
"status_history": [
{
"evaluator": "Automated",
"status": "Pass",
"timestamp": "2022-11-22T00:00:00Z"
}
]
],
"description": null,
"reference": null,
"evaluated_assets": null
}
],
"notes": null,
Expand Down
34 changes: 32 additions & 2 deletions src/aind_data_schema/core/quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Any, List, Literal, Optional

from aind_data_schema_models.modalities import Modality
from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, Field, field_validator, model_validator

from aind_data_schema.base import AindCoreModel, AindModel, AwareDatetimeWithDefault

Expand All @@ -26,6 +26,7 @@ class Stage(str, Enum):
RAW = "Raw data"
PROCESSING = "Processing"
ANALYSIS = "Analysis"
MULTI_ASSET = "Multi-asset"


class QCStatus(BaseModel):
Expand All @@ -41,9 +42,14 @@ class QCMetric(BaseModel):

name: str = Field(..., title="Metric name")
value: Any = Field(..., title="Metric value")
status_history: List[QCStatus] = Field(default=[], title="Metric status history")
description: Optional[str] = Field(default=None, title="Metric description")
reference: Optional[str] = Field(default=None, title="Metric reference image URL or plot type")
status_history: List[QCStatus] = Field(default=[], title="Metric status history")
evaluated_assets: Optional[List[str]] = Field(
default=None,
title="List of asset names that this metric depends on",
description="Set to None except when a metric's calculation required data coming from a different data asset.",
)

@property
def status(self) -> QCStatus:
Expand Down Expand Up @@ -125,6 +131,30 @@ def failed_metrics(self) -> Optional[List[QCMetric]]:

return failing_metrics

@model_validator(mode="after")
def validate_multi_asset(cls, v):
"""Ensure that the evaluated_assets field in any attached metrics is set correctly"""
stage = v.stage
metrics = v.metrics

if stage == Stage.MULTI_ASSET:
for metric in metrics:
if not metric.evaluated_assets or len(metric.evaluated_assets) == 0:
raise ValueError(
f"Metric '{metric.name}' is in a multi-asset QCEvaluation and must have evaluated_assets set."
)
else:
# make sure all evaluated assets are None
for metric in metrics:
if metric.evaluated_assets:
raise ValueError(
(
f"Metric '{metric.name}' is in a single-asset QCEvaluation"
" and should not have evaluated_assets"
)
)
return v


class QualityControl(AindCoreModel):
"""Description of quality metrics for a data asset"""
Expand Down
26 changes: 13 additions & 13 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,19 +285,19 @@ def test_validate_underscore_modality(self):
# Tests missing metadata
surgery1 = Surgery.model_construct(procedures=[nano_inj, ionto_inj])
m = Metadata(
name="ecephys_655019_2023-04-03_18-17-09",
location="bucket",
data_description=DataDescription.model_construct(
label="some label",
platform=Platform.ECEPHYS,
creation_time=time(12, 12, 12),
modality=[Modality.BEHAVIOR_VIDEOS],
),
subject=Subject.model_construct(),
procedures=Procedures.model_construct(subject_procedures=[surgery1]),
rig=rig,
session=session,
)
name="ecephys_655019_2023-04-03_18-17-09",
location="bucket",
data_description=DataDescription.model_construct(
label="some label",
platform=Platform.ECEPHYS,
creation_time=time(12, 12, 12),
modality=[Modality.BEHAVIOR_VIDEOS],
),
subject=Subject.model_construct(),
procedures=Procedures.model_construct(subject_procedures=[surgery1]),
rig=rig,
session=session,
)
self.assertIsNotNone(m)

def test_validate_rig_session_compatibility(self):
Expand Down
85 changes: 85 additions & 0 deletions tests/test_quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,91 @@ def test_metric_status(self):

self.assertTrue(expected_exception in repr(context.exception))

def test_multi_session(self):
"""Ensure that the multi-asset QCEvaluation validator checks for evaluated_assets"""
# Check for non-multi-session that all evaluated_assets are None
t0 = datetime.fromisoformat("2020-10-10")

evaluation = QCEvaluation(
name="Drift map",
modality=Modality.ECEPHYS,
stage=Stage.PROCESSING,
metrics=[
QCMetric(
name="Multiple values example",
value={"stuff": "in_a_dict"},
status_history=[
QCStatus(evaluator="Automated", timestamp=t0, status=Status.PASS),
],
),
],
)

self.assertTrue(evaluation.stage != Stage.MULTI_ASSET)
self.assertIsNone(evaluation.metrics[0].evaluated_assets)

# Check that single-asset QC with evaluated_assets throws a validation error
with self.assertRaises(ValidationError) as context:
QCEvaluation(
name="Drift map",
modality=Modality.ECEPHYS,
stage=Stage.PROCESSING,
metrics=[
QCMetric(
name="Multiple values example",
value={"stuff": "in_a_dict"},
status_history=[
QCStatus(evaluator="Automated", timestamp=t0, status=Status.PASS),
],
evaluated_assets=["asset0", "asset1"],
),
],
)

print(context.exception)
self.assertTrue(
"is in a single-asset QCEvaluation and should not have evaluated_assets" in repr(context.exception)
)

# Check that multi-asset with empty evaluated_assets raises a validation error
with self.assertRaises(ValidationError) as context:
QCEvaluation(
name="Drift map",
modality=Modality.ECEPHYS,
stage=Stage.MULTI_ASSET,
metrics=[
QCMetric(
name="Multiple values example",
value={"stuff": "in_a_dict"},
status_history=[
QCStatus(evaluator="Automated", timestamp=t0, status=Status.PASS),
],
evaluated_assets=[],
),
],
)

self.assertTrue("is in a multi-asset QCEvaluation and must have evaluated_assets" in repr(context.exception))

# Check that multi-asset with missing evaluated_assets raises a validation error
with self.assertRaises(ValidationError) as context:
QCEvaluation(
name="Drift map",
modality=Modality.ECEPHYS,
stage=Stage.MULTI_ASSET,
metrics=[
QCMetric(
name="Multiple values example",
value={"stuff": "in_a_dict"},
status_history=[
QCStatus(evaluator="Automated", timestamp=t0, status=Status.PASS),
],
),
],
)

self.assertTrue("is in a multi-asset QCEvaluation and must have evaluated_assets" in repr(context.exception))


if __name__ == "__main__":
unittest.main()

0 comments on commit dbb4d99

Please sign in to comment.