diff --git a/README.md b/README.md index 9d206ed..aaaad64 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,8 @@ pip install aind_session datetime.datetime(2023, 12, 13, 13, 43, 40) >>> len(session.data_assets) # doctest: +SKIP 42 +>>> session.is_uploaded +True >>> session.raw_data_asset.id '16d46411-540a-4122-b47f-8cb2a15d593a' >>> session.raw_data_dir.as_posix() @@ -72,6 +74,8 @@ datetime.datetime(2023, 12, 13, 13, 43, 40) # Additional functionality in namespace extensions: >>> session.metadata.subject['genotype'] 'Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt' +>>> session.ecephys.is_sorted +True >>> session.ecephys.sorted_data_asset.name 'ecephys_676909_2023-12-13_13-43-40_sorted_2024-03-01_16-02-45' diff --git a/src/aind_session/extensions/ecephys.py b/src/aind_session/extensions/ecephys.py index 22e2f18..b063b8e 100644 --- a/src/aind_session/extensions/ecephys.py +++ b/src/aind_session/extensions/ecephys.py @@ -1,6 +1,7 @@ from __future__ import annotations import datetime +import functools import logging from typing import ClassVar, Literal @@ -36,7 +37,7 @@ class Ecephys(aind_session.extension.ExtensionBaseClass): SORTING_PIPELINE_ID: ClassVar[str] = "1f8f159a-7670-47a9-baf1-078905fc9c2e" TRIGGER_CAPSULE_ID: ClassVar[str] = "eb5a26e4-a391-4d79-9da5-1ab65b71253f" - @npc_io.cached_property + @property def sorted_data_assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]: """All sorted data assets associated with the session (may be empty). @@ -50,7 +51,7 @@ def sorted_data_assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]: >>> session.ecephys.sorted_data_assets[0].created 1702783011 - Empty + Empty if no sorted data assets are found: >>> session = aind_session.Session('ecephys_676909_2023-12-13_13-43-39') >>> session.ecephys.sorted_data_assets () @@ -99,12 +100,28 @@ def is_sorted_data_asset(asset_id: str | codeocean.data_asset.DataAsset) -> bool ) return False - @npc_io.cached_property + @property + def is_sorted(self) -> bool: + """Check if a sorted data asset exists, and is not in an error state. + + Examples + -------- + >>> session = aind_session.Session('ecephys_676909_2023-12-13_13-43-40') + >>> session.ecephys.is_sorted + True + """ + if not self.sorted_data_assets: + return False + if self.is_sorted_asset_error(self.sorted_data_asset): + return False + return True + + @property def sorted_data_asset(self) -> codeocean.data_asset.DataAsset: """Latest sorted data asset associated with the session. - Raises `LookupError` if no sorted data assets are found. - + Raises `AttributeError` if no sorted data assets are found. + Examples -------- >>> session = aind_session.Session('ecephys_676909_2023-12-13_13-43-40') @@ -125,20 +142,21 @@ def sorted_data_asset(self) -> codeocean.data_asset.DataAsset: f"Found {len(self.sorted_data_assets)} sorted data assets for {self._session.id}: most recent asset will be used ({created=})" ) else: - raise LookupError( - f"No sorted data asset found for {self._session.id}. Has session data been uploaded?" + raise AttributeError( + f"No sorted data asset found for {self._session.id}:", + f" raw data has not been uploaded yet." if not self._session.is_uploaded else " try session.ecephys.run_sorting()" ) logger.debug(f"Using {asset.id=} for {self._session.id} sorted data asset") return asset - @npc_io.cached_property + @property def sorted_data_dir(self) -> upath.UPath: """Path to the dir containing the latest sorted data associated with the session, likely in an S3 bucket. - uses latest sorted data asset to get path (existence is checked) - if no sorted data asset is found, checks for a data dir in S3 - - raises `FileNotFoundError` if no sorted data assets are available to link + - raises `AttributeError` if no sorted data assets are available to link to the session Examples @@ -149,8 +167,8 @@ def sorted_data_dir(self) -> upath.UPath: """ try: _ = self.sorted_data_asset - except LookupError: - raise FileNotFoundError( + except AttributeError: + raise AttributeError( f"No sorted data asset found in CodeOcean for {self._session.id}. Has the session been sorted?" ) from None else: @@ -167,7 +185,7 @@ def sorted_data_dir(self) -> upath.UPath: ) return sorted_data_dir - @npc_io.cached_property + @property def clipped_dir(self) -> upath.UPath: """Path to the dir containing original Open Ephys recording data, with truncated `continuous.dat` files. @@ -186,12 +204,12 @@ def clipped_dir(self) -> upath.UPath: self._session.raw_data_asset.id )[0] ) is None: - raise FileNotFoundError( + raise AttributeError( f"No 'clipped' dir found in uploaded raw data for {self._session.id} (checked in root dir and modality subdirectory)" ) return path - @npc_io.cached_property + @property def compressed_dir(self) -> upath.UPath: """ Path to the dir containing compressed zarr format versions of Open Ephys @@ -211,7 +229,7 @@ def compressed_dir(self) -> upath.UPath: self._session.raw_data_asset.id )[1] ) is None: - raise FileNotFoundError( + raise AttributeError( f"No 'compressed' dir found in uploaded raw data for {self._session.id} (checked in root dir and modality subdirectory)" ) return path @@ -256,7 +274,7 @@ def get_clipped_and_compressed_dirs( assert len(return_paths) == 2 return return_paths[0], return_paths[1] - @npc_io.cached_property + @property def sorted_probes(self) -> tuple[str, ...]: """Names of probes that reached the final stage of the sorting pipeline. @@ -328,7 +346,7 @@ def get_sorted_probe_names( logger.debug(f"Found {len(probes)} probes in {parent_dir.as_posix()}: {probes}") return tuple(sorted(probes)) - @npc_io.cached_property + @property def is_sorting_fail(self) -> bool: """Check if the latest sorted data asset indicates that the sorting pipeline failed. diff --git a/src/aind_session/session.py b/src/aind_session/session.py index 6cf24d8..1906c63 100644 --- a/src/aind_session/session.py +++ b/src/aind_session/session.py @@ -60,7 +60,7 @@ class Session: >>> session.raw_data_dir.as_posix() Traceback (most recent call last): ... - FileNotFoundError: No raw data asset in CodeOcean and no dir in known data buckets on S3 for SmartSPIM_698260_2024-07-20_21-47-21 + AttributeError: No raw data asset in CodeOcean and no dir in known data buckets on S3 for SmartSPIM_698260_2024-07-20_21-47-21 Additional functionality for modalities added by extensions: >>> session = Session('ecephys_676909_2023-12-13_13-43-40') @@ -129,7 +129,7 @@ def __lt__(self, other: Session) -> bool: """ return self.id < other.id - @npc_io.cached_property + @property def data_assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]: """All data assets associated with the session. @@ -143,13 +143,42 @@ def data_assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]: >>> session.data_assets[0].name 'ecephys_676909_2023-12-13_13-43-40' """ - return aind_session.utils.get_session_data_assets(self.id) + return aind_session.utils.get_session_data_assets( + session_id=self.id, + ttl_hash=aind_session.utils.get_ttl_hash(), + ) + + @property + def is_uploaded(self) -> bool: + """Check if the session's raw data has been uploaded. + + - returns `True` if any raw data assets exist, or raw data dir found in S3 + - returns `False` otherwise - @npc_io.cached_property + Examples + -------- + >>> session = aind_session.Session('ecephys_676909_2023-12-13_13-43-40') + >>> session.is_uploaded + True + """ + if getattr(self, "raw_data_asset", None) is not None: + return True + try: + _ = aind_session.utils.get_source_dir_by_name( + name=self.id, + ttl_hash=aind_session.utils.get_ttl_hash(), + ) + except FileNotFoundError: + return False + else: + return True + + @property def raw_data_asset(self) -> codeocean.data_asset.DataAsset: """Latest raw data asset associated with the session. - - raises `LookupError` if no raw data assets are found + - raises `AttributeError` if no raw data assets are found, so `getattr()` + can be used to lookup the attribute without raising an exception Examples -------- @@ -175,20 +204,28 @@ def raw_data_asset(self) -> codeocean.data_asset.DataAsset: f"Found {len(assets)} raw data assets for {self.id}: latest asset will be used ({created=})" ) else: - raise LookupError( - f"No raw data asset found for {self.id}. Has session data been uploaded?" - ) + msg = f"No raw data assets found for {self.id}." + try: + path = aind_session.utils.get_source_dir_by_name( + name=self.id, + ttl_hash=aind_session.utils.get_ttl_hash(), + ) + except FileNotFoundError: + msg += " The session has likely not been uploaded." + else: + msg += f" Raw data found in {path.as_posix()}: a raw data asset needs to be created." + raise AttributeError(msg) logger.debug(f"Using {asset.id=} for {self.id} raw data asset") return asset - @npc_io.cached_property + @property def raw_data_dir(self) -> upath.UPath: """Path to the dir containing raw data associated with the session, likely in an S3 bucket. - uses latest raw data asset to get path (existence is checked) - if no raw data asset is found, checks for a data dir in S3 - - raises `FileNotFoundError` if no raw data assets are available to link + - raises `AttributeError` if no raw data assets are available to link to the session Examples @@ -197,29 +234,32 @@ def raw_data_dir(self) -> upath.UPath: >>> session.raw_data_dir.as_posix() 's3://aind-ephys-data/ecephys_676909_2023-12-13_13-43-40' """ - try: - _ = self.raw_data_asset - except LookupError: - with contextlib.suppress(FileNotFoundError): - path = aind_session.utils.get_source_dir_by_name(self.id) - logger.debug( - f"No raw data asset uploaded for {self.id}, but data dir found: {path}" - ) - return path - raise FileNotFoundError( - f"No raw data asset in CodeOcean and no dir in known data buckets on S3 for {self.id}" - ) from None - else: + if getattr(self, "raw_data_asset", None): logger.debug( f"Using asset {self.raw_data_asset.id} to find raw data path for {self.id}" ) raw_data_dir = aind_session.utils.get_data_asset_source_dir( asset_id=self.raw_data_asset.id, + ttl_hash=aind_session.utils.get_ttl_hash() ) logger.debug(f"Raw data dir found for {self.id}: {raw_data_dir}") return raw_data_dir + try: + path = aind_session.utils.get_source_dir_by_name( + name=self.id, + ttl_hash=aind_session.utils.get_ttl_hash(), + ) + except FileNotFoundError: + raise AttributeError( + f"No raw data asset in CodeOcean and no dir in known data buckets on S3 for {self.id}" + ) from None + else: + logger.warning( + f"No raw data asset exists for {self.id}, but uploaded data dir found: {path}" + ) + return path - @npc_io.cached_property + @property def modalities(self) -> tuple[str, ...]: """Names of modalities available in the session's raw data dir. @@ -234,6 +274,9 @@ def modalities(self) -> tuple[str, ...]: >>> session.modalities ('behavior', 'behavior_videos', 'ecephys') """ + if not self.is_uploaded: + logger.warning(f"Raw data has not been uploaded for {self.id}: no modalities available yet") + return () dir_names: set[str] = { d.name for d in self.raw_data_dir.iterdir() if d.is_dir() } @@ -279,7 +322,7 @@ def get_sessions( - `datetime.date` and `datetime.datetime` objects are also accepted - raises `ValueError` if any of the provided filtering arguments are invalid - - raises `LookupError` if no sessions are found matching the criteria + - returns an empty tuple if no sessions are found matching the criteria - note on performance and CodeOcean API calls: all assets associated with a subject are fetched once and cached, so subsequent calls to this function @@ -336,7 +379,7 @@ def get_sessions( continue sessions.add(session) if not sessions: - raise LookupError(f"No sessions found matching {parameters=}") + logger.info(f"No sessions found matching {parameters=}") return tuple(sorted(sessions)) diff --git a/src/aind_session/utils/misc_utils.py b/src/aind_session/utils/misc_utils.py index 824c528..87670ad 100644 --- a/src/aind_session/utils/misc_utils.py +++ b/src/aind_session/utils/misc_utils.py @@ -6,7 +6,7 @@ logger = logging.getLogger(__name__) -def get_ttl_hash(seconds: float = 2 * 60) -> int: +def get_ttl_hash(seconds: float = 10 * 60) -> int: """Return the same value within `seconds` time period. - used to cache function results for a limited period of time