Skip to content

Commit

Permalink
Revise properties
Browse files Browse the repository at this point in the history
- raise AttributeError to allow getattr usage
- add properties to check is_uploaded/is_sorted
- remove cached_properties in place of caching base functions with time-to-live
- set default ttl 10 minutes
  • Loading branch information
bjhardcastle committed Aug 26, 2024
1 parent 3e4f391 commit dd24b01
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 44 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ pip install aind_session
datetime.datetime(2023, 12, 13, 13, 43, 40)
>>> len(session.data_assets) # doctest: +SKIP
42
>>> session.is_uploaded
True
>>> session.raw_data_asset.id
'16d46411-540a-4122-b47f-8cb2a15d593a'
>>> session.raw_data_dir.as_posix()
Expand All @@ -72,6 +74,8 @@ datetime.datetime(2023, 12, 13, 13, 43, 40)
# Additional functionality in namespace extensions:
>>> session.metadata.subject['genotype']
'Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt'
>>> session.ecephys.is_sorted
True
>>> session.ecephys.sorted_data_asset.name
'ecephys_676909_2023-12-13_13-43-40_sorted_2024-03-01_16-02-45'

Expand Down
52 changes: 35 additions & 17 deletions src/aind_session/extensions/ecephys.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import datetime
import functools
import logging
from typing import ClassVar, Literal

Expand Down Expand Up @@ -36,7 +37,7 @@ class Ecephys(aind_session.extension.ExtensionBaseClass):
SORTING_PIPELINE_ID: ClassVar[str] = "1f8f159a-7670-47a9-baf1-078905fc9c2e"
TRIGGER_CAPSULE_ID: ClassVar[str] = "eb5a26e4-a391-4d79-9da5-1ab65b71253f"

@npc_io.cached_property
@property
def sorted_data_assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]:
"""All sorted data assets associated with the session (may be empty).
Expand All @@ -50,7 +51,7 @@ def sorted_data_assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]:
>>> session.ecephys.sorted_data_assets[0].created
1702783011
Empty
Empty if no sorted data assets are found:
>>> session = aind_session.Session('ecephys_676909_2023-12-13_13-43-39')
>>> session.ecephys.sorted_data_assets
()
Expand Down Expand Up @@ -99,12 +100,28 @@ def is_sorted_data_asset(asset_id: str | codeocean.data_asset.DataAsset) -> bool
)
return False

@npc_io.cached_property
@property
def is_sorted(self) -> bool:
"""Check if a sorted data asset exists, and is not in an error state.
Examples
--------
>>> session = aind_session.Session('ecephys_676909_2023-12-13_13-43-40')
>>> session.ecephys.is_sorted
True
"""
if not self.sorted_data_assets:
return False
if self.is_sorted_asset_error(self.sorted_data_asset):
return False
return True

@property
def sorted_data_asset(self) -> codeocean.data_asset.DataAsset:
"""Latest sorted data asset associated with the session.
Raises `LookupError` if no sorted data assets are found.
Raises `AttributeError` if no sorted data assets are found.
Examples
--------
>>> session = aind_session.Session('ecephys_676909_2023-12-13_13-43-40')
Expand All @@ -125,20 +142,21 @@ def sorted_data_asset(self) -> codeocean.data_asset.DataAsset:
f"Found {len(self.sorted_data_assets)} sorted data assets for {self._session.id}: most recent asset will be used ({created=})"
)
else:
raise LookupError(
f"No sorted data asset found for {self._session.id}. Has session data been uploaded?"
raise AttributeError(
f"No sorted data asset found for {self._session.id}:",
f" raw data has not been uploaded yet." if not self._session.is_uploaded else " try session.ecephys.run_sorting()"
)
logger.debug(f"Using {asset.id=} for {self._session.id} sorted data asset")
return asset

@npc_io.cached_property
@property
def sorted_data_dir(self) -> upath.UPath:
"""Path to the dir containing the latest sorted data associated with the
session, likely in an S3 bucket.
- uses latest sorted data asset to get path (existence is checked)
- if no sorted data asset is found, checks for a data dir in S3
- raises `FileNotFoundError` if no sorted data assets are available to link
- raises `AttributeError` if no sorted data assets are available to link
to the session
Examples
Expand All @@ -149,8 +167,8 @@ def sorted_data_dir(self) -> upath.UPath:
"""
try:
_ = self.sorted_data_asset
except LookupError:
raise FileNotFoundError(
except AttributeError:
raise AttributeError(
f"No sorted data asset found in CodeOcean for {self._session.id}. Has the session been sorted?"
) from None
else:
Expand All @@ -167,7 +185,7 @@ def sorted_data_dir(self) -> upath.UPath:
)
return sorted_data_dir

@npc_io.cached_property
@property
def clipped_dir(self) -> upath.UPath:
"""Path to the dir containing original Open Ephys recording data, with
truncated `continuous.dat` files.
Expand All @@ -186,12 +204,12 @@ def clipped_dir(self) -> upath.UPath:
self._session.raw_data_asset.id
)[0]
) is None:
raise FileNotFoundError(
raise AttributeError(
f"No 'clipped' dir found in uploaded raw data for {self._session.id} (checked in root dir and modality subdirectory)"
)
return path

@npc_io.cached_property
@property
def compressed_dir(self) -> upath.UPath:
"""
Path to the dir containing compressed zarr format versions of Open Ephys
Expand All @@ -211,7 +229,7 @@ def compressed_dir(self) -> upath.UPath:
self._session.raw_data_asset.id
)[1]
) is None:
raise FileNotFoundError(
raise AttributeError(
f"No 'compressed' dir found in uploaded raw data for {self._session.id} (checked in root dir and modality subdirectory)"
)
return path
Expand Down Expand Up @@ -256,7 +274,7 @@ def get_clipped_and_compressed_dirs(
assert len(return_paths) == 2
return return_paths[0], return_paths[1]

@npc_io.cached_property
@property
def sorted_probes(self) -> tuple[str, ...]:
"""Names of probes that reached the final stage of the sorting pipeline.
Expand Down Expand Up @@ -328,7 +346,7 @@ def get_sorted_probe_names(
logger.debug(f"Found {len(probes)} probes in {parent_dir.as_posix()}: {probes}")
return tuple(sorted(probes))

@npc_io.cached_property
@property
def is_sorting_fail(self) -> bool:
"""Check if the latest sorted data asset indicates that the sorting pipeline failed.
Expand Down
95 changes: 69 additions & 26 deletions src/aind_session/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class Session:
>>> session.raw_data_dir.as_posix()
Traceback (most recent call last):
...
FileNotFoundError: No raw data asset in CodeOcean and no dir in known data buckets on S3 for SmartSPIM_698260_2024-07-20_21-47-21
AttributeError: No raw data asset in CodeOcean and no dir in known data buckets on S3 for SmartSPIM_698260_2024-07-20_21-47-21
Additional functionality for modalities added by extensions:
>>> session = Session('ecephys_676909_2023-12-13_13-43-40')
Expand Down Expand Up @@ -129,7 +129,7 @@ def __lt__(self, other: Session) -> bool:
"""
return self.id < other.id

@npc_io.cached_property
@property
def data_assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]:
"""All data assets associated with the session.
Expand All @@ -143,13 +143,42 @@ def data_assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]:
>>> session.data_assets[0].name
'ecephys_676909_2023-12-13_13-43-40'
"""
return aind_session.utils.get_session_data_assets(self.id)
return aind_session.utils.get_session_data_assets(
session_id=self.id,
ttl_hash=aind_session.utils.get_ttl_hash(),
)

@property
def is_uploaded(self) -> bool:
"""Check if the session's raw data has been uploaded.
- returns `True` if any raw data assets exist, or raw data dir found in S3
- returns `False` otherwise
@npc_io.cached_property
Examples
--------
>>> session = aind_session.Session('ecephys_676909_2023-12-13_13-43-40')
>>> session.is_uploaded
True
"""
if getattr(self, "raw_data_asset", None) is not None:
return True
try:
_ = aind_session.utils.get_source_dir_by_name(
name=self.id,
ttl_hash=aind_session.utils.get_ttl_hash(),
)
except FileNotFoundError:
return False
else:
return True

@property
def raw_data_asset(self) -> codeocean.data_asset.DataAsset:
"""Latest raw data asset associated with the session.
- raises `LookupError` if no raw data assets are found
- raises `AttributeError` if no raw data assets are found, so `getattr()`
can be used to lookup the attribute without raising an exception
Examples
--------
Expand All @@ -175,20 +204,28 @@ def raw_data_asset(self) -> codeocean.data_asset.DataAsset:
f"Found {len(assets)} raw data assets for {self.id}: latest asset will be used ({created=})"
)
else:
raise LookupError(
f"No raw data asset found for {self.id}. Has session data been uploaded?"
)
msg = f"No raw data assets found for {self.id}."
try:
path = aind_session.utils.get_source_dir_by_name(
name=self.id,
ttl_hash=aind_session.utils.get_ttl_hash(),
)
except FileNotFoundError:
msg += " The session has likely not been uploaded."
else:
msg += f" Raw data found in {path.as_posix()}: a raw data asset needs to be created."
raise AttributeError(msg)
logger.debug(f"Using {asset.id=} for {self.id} raw data asset")
return asset

@npc_io.cached_property
@property
def raw_data_dir(self) -> upath.UPath:
"""Path to the dir containing raw data associated with the session, likely
in an S3 bucket.
- uses latest raw data asset to get path (existence is checked)
- if no raw data asset is found, checks for a data dir in S3
- raises `FileNotFoundError` if no raw data assets are available to link
- raises `AttributeError` if no raw data assets are available to link
to the session
Examples
Expand All @@ -197,29 +234,32 @@ def raw_data_dir(self) -> upath.UPath:
>>> session.raw_data_dir.as_posix()
's3://aind-ephys-data/ecephys_676909_2023-12-13_13-43-40'
"""
try:
_ = self.raw_data_asset
except LookupError:
with contextlib.suppress(FileNotFoundError):
path = aind_session.utils.get_source_dir_by_name(self.id)
logger.debug(
f"No raw data asset uploaded for {self.id}, but data dir found: {path}"
)
return path
raise FileNotFoundError(
f"No raw data asset in CodeOcean and no dir in known data buckets on S3 for {self.id}"
) from None
else:
if getattr(self, "raw_data_asset", None):
logger.debug(
f"Using asset {self.raw_data_asset.id} to find raw data path for {self.id}"
)
raw_data_dir = aind_session.utils.get_data_asset_source_dir(
asset_id=self.raw_data_asset.id,
ttl_hash=aind_session.utils.get_ttl_hash()
)
logger.debug(f"Raw data dir found for {self.id}: {raw_data_dir}")
return raw_data_dir
try:
path = aind_session.utils.get_source_dir_by_name(
name=self.id,
ttl_hash=aind_session.utils.get_ttl_hash(),
)
except FileNotFoundError:
raise AttributeError(
f"No raw data asset in CodeOcean and no dir in known data buckets on S3 for {self.id}"
) from None
else:
logger.warning(
f"No raw data asset exists for {self.id}, but uploaded data dir found: {path}"
)
return path

@npc_io.cached_property
@property
def modalities(self) -> tuple[str, ...]:
"""Names of modalities available in the session's raw data dir.
Expand All @@ -234,6 +274,9 @@ def modalities(self) -> tuple[str, ...]:
>>> session.modalities
('behavior', 'behavior_videos', 'ecephys')
"""
if not self.is_uploaded:
logger.warning(f"Raw data has not been uploaded for {self.id}: no modalities available yet")
return ()
dir_names: set[str] = {
d.name for d in self.raw_data_dir.iterdir() if d.is_dir()
}
Expand Down Expand Up @@ -279,7 +322,7 @@ def get_sessions(
- `datetime.date` and `datetime.datetime` objects are also accepted
- raises `ValueError` if any of the provided filtering arguments are invalid
- raises `LookupError` if no sessions are found matching the criteria
- returns an empty tuple if no sessions are found matching the criteria
- note on performance and CodeOcean API calls: all assets associated with a
subject are fetched once and cached, so subsequent calls to this function
Expand Down Expand Up @@ -336,7 +379,7 @@ def get_sessions(
continue
sessions.add(session)
if not sessions:
raise LookupError(f"No sessions found matching {parameters=}")
logger.info(f"No sessions found matching {parameters=}")
return tuple(sorted(sessions))


Expand Down
2 changes: 1 addition & 1 deletion src/aind_session/utils/misc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
logger = logging.getLogger(__name__)


def get_ttl_hash(seconds: float = 2 * 60) -> int:
def get_ttl_hash(seconds: float = 10 * 60) -> int:
"""Return the same value within `seconds` time period.
- used to cache function results for a limited period of time
Expand Down

0 comments on commit dd24b01

Please sign in to comment.