Skip to content

Commit

Permalink
Update docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
bjhardcastle committed Aug 16, 2024
1 parent b6f5a80 commit 0c1c31e
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 30 deletions.
9 changes: 3 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Please check this out and make feature requests, but don't rely on the API to re


# Aim
This package is meant to provide easy access to session-related stuff required for common tasks in CodeOcean and beyond.
This package is meant to provide easy access to session information needed for common tasks, in CodeOcean and beyond.

- when interacting with the CodeOcean API, it uses and returns objects from the [official Python library](https://github.com/codeocean/codeocean-sdk-python) - we will avoid duplicating functionality provided by that package, except to make convenience functions with assumptions baked-in (for example, getting a client with environment variables and a default domain; finding all the assets for a particular session)
- the core `Session` class should have a minimal set of methods and attributes that are common to sessions from all platforms - it should be fast to initialize and not do unnecessary work
Expand Down Expand Up @@ -74,13 +74,10 @@ datetime.datetime(2023, 12, 13, 13, 43, 40)
# Objects refer to the original session, regardless of how they were created:
>>> a = aind_session.Session('ecephys_676909_2023-12-13_13-43-40')
>>> b = aind_session.Session('ecephys_676909_2023-12-13_13-43-40_sorted_2024-03-01_16-02-45')
>>> a == b and a is not b
True
>>> assert len(set((a, b))) == 1, "Session objects must be hashable, based on session ID"

>>> assert a == b, "Objects are equal if they refer to the same session ID"
# ...objects are also hashable and sortable (using their IDs)
```


When working in a capsule, the `Session` object can be used to find or verify attached data assets:
```python
>>> import os
Expand Down
8 changes: 4 additions & 4 deletions src/aind_session/extensions/ecephys.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

@aind_session.extension.register_namespace("ecephys")
class Ecephys(aind_session.extension.ExtensionBaseClass):
"""Extension for the ecephys modality for handling sorted data assets, etc."""
"""Extension providing an ecephys modality namespace, for handling sorted data assets etc."""

@property
def sorted_data_asset(self) -> codeocean.data_asset.DataAsset:
Expand Down Expand Up @@ -49,8 +49,8 @@ def sorted_data_asset(self) -> codeocean.data_asset.DataAsset:

@npc_io.cached_property
def sorted_data_dir(self) -> upath.UPath:
"""Path to the sorted data associated with the session, likely in an S3
bucket.
"""Path to the dir containing the latest sorted data associated with the
session, likely in an S3 bucket.
- uses latest sorted data asset to get path (existence is checked)
- if no sorted data asset is found, checks for a data dir in S3
Expand Down Expand Up @@ -87,7 +87,7 @@ def sorted_data_dir(self) -> upath.UPath:
def is_sorted_data_asset(asset_id: str | codeocean.data_asset.DataAsset) -> bool:
"""Check if the asset is a sorted data asset.
- assumed sorted asset to be named <session-id>_sorted<unknown-suffix>
- assumes sorted asset to be named <session-id>_sorted<unknown-suffix>
- does not assume platform to be `ecephys`
Examples:
Expand Down
32 changes: 17 additions & 15 deletions src/aind_session/extensions/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,26 @@

@aind_session.extension.register_namespace("metadata")
class Metadata(aind_session.extension.ExtensionBaseClass):
"""Extension for metadata, currently fetched from jsons in raw data
folder.
"""Extension providing metadata on Session object.
- currently fetches json files from raw data folder (if data has been uploaded)
- provides contents of json as a dict
Note: files with a '.' in the name are not supported via attribute access
(e.g. 'metadata.nd.json'), but can be accessed via `gettattr()`
"""
def __getattr__(self, name: str) -> dict[str, Any]:
"""Fetch metadata from the raw data folder.
Examples:
>>> from aind_session import Session
>>> session = Session('ecephys_676909_2023-12-13_13-43-40')
>>> session.metadata.subject['genotype']
'Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt'
Examples:
>>> from aind_session import Session
>>> session = Session('ecephys_676909_2023-12-13_13-43-40')
>>> session.metadata.subject['genotype']
'Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt'
# Files with a '.' in the name must be accessed via getattr:
>>> content = getattr(session.metadata, 'metadata.nd')
"""

# Files with a '.' in the name must be accessed via getattr:
>>> content = getattr(session.metadata, 'metadata.nd')
def __getattr__(self, name: str) -> dict[str, Any]:
"""Return contents of metadata json file from raw data folder.
"""
try:
_ = self.json_files
Expand All @@ -53,14 +55,14 @@ def __getattr__(self, name: str) -> dict[str, Any]:

@property
def json_dir(self) -> upath.UPath:
"""Parent dir containing metadata json files"""
"""Path of dir containing metadata json files"""
path = self._session.raw_data_dir # may raise FileNotFoundError
logger.debug(f"Using {path.as_posix()} as parent dir for metadata json files")
return path

@npc_io.cached_property
def json_files(self) -> tuple[upath.UPath, ...]:
"""All available metadata jsons in the raw data folder.
"""Paths of all available metadata jsons in the session's metadata dir.
Examples:
>>> from aind_session import Session
Expand Down
22 changes: 17 additions & 5 deletions src/aind_session/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ class Session:
- makes use of, and returns, objects from `https://github.com/codeocean/codeocean-sdk-python`
Examples:
```python
>>> session = Session('ecephys_676909_2023-12-13_13-43-40')
# the same session ID would be extracted from a path:
>>> session = Session('/root/capsule/aind_session/ecephys_676909_2023-12-13_13-43-40')
# the same session ID would be extracted from a longer string:
>>> session = Session('ecephys_676909_2023-12-13_13-43-40_sorted_2024-03-01_16-02-45')
# Common attributes available for all sessions:
>>> session = Session('ecephys_676909_2023-12-13_13-43-40')
>>> session.platform
Expand Down Expand Up @@ -53,6 +62,7 @@ class Session:
# Additional functionality for modalities added by extensions:
>>> session = Session('ecephys_676909_2023-12-13_13-43-40')
>>> session.ecephys.sorted_data_asset.id # doctest: +SKIP
```
"""

def __init__(self, session_id: str) -> None:
Expand Down Expand Up @@ -113,8 +123,10 @@ def __lt__(self, other: Session) -> bool:

@npc_io.cached_property
def assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]:
"""All data assets associated with the session - may be empty.
"""All data assets associated with the session.
- objects are instances of `codeocean.data_asset.DataAsset`
- may be empty
- sorted by ascending creation date
"""
return aind_session.utils.get_session_data_assets(self.id)
Expand All @@ -123,7 +135,7 @@ def assets(self) -> tuple[codeocean.data_asset.DataAsset, ...]:
def raw_data_asset(self) -> codeocean.data_asset.DataAsset:
"""Latest raw data asset associated with the session.
Raises `LookupError` if no raw data assets are found.
- raises `LookupError` if no raw data assets are found
"""
assets = tuple(
asset
Expand All @@ -146,8 +158,8 @@ def raw_data_asset(self) -> codeocean.data_asset.DataAsset:

@npc_io.cached_property
def raw_data_dir(self) -> upath.UPath:
"""Path to the raw data associated with the session, likely in an S3
bucket.
"""Path to the dir containing raw data associated with the session, likely
in an S3 bucket.
- uses latest raw data asset to get path (existence is checked)
- if no raw data asset is found, checks for a data dir in S3
Expand Down
4 changes: 4 additions & 0 deletions src/aind_session/utils/s3_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from __future__ import annotations

import functools
import logging

import upath

logger = logging.getLogger(__name__)

S3_DATA_BUCKET_NAMES = (
"codeocean-s3datasetsbucket-1u41qdg42ur9",
"aind-private-data-prod-o5171v",
Expand All @@ -30,6 +33,7 @@ def get_source_dir_by_name(name: str, ttl_hash: int | None = None) -> upath.UPat
for s3_bucket in S3_DATA_BUCKET_NAMES:
path = upath.UPath(f"s3://{s3_bucket}/{name}")
if path.exists():
logger.debug(f"Found dir matching {name!r} in {path.parent.as_posix()}")
return path
raise FileNotFoundError(f"No dir named {name!r} found in known data buckets on S3")

Expand Down

0 comments on commit 0c1c31e

Please sign in to comment.