Skip to content

Commit

Permalink
Establish consistent path names
Browse files Browse the repository at this point in the history
  • Loading branch information
bjhardcastle authored Aug 15, 2024
1 parent fa7dcf2 commit 50456d5
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 50 deletions.
10 changes: 5 additions & 5 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
"python.analysis.typeCheckingMode": "off",
"workbench.colorCustomizations": {
"commandCenter.border": "#e7e7e799",
"panel.border": "#d56eb0",
"sash.hoverBorder": "#d56eb0",
"titleBar.activeBackground": "#ca469a",
"panel.border": "#ee2590",
"sash.hoverBorder": "#ee2590",
"titleBar.activeBackground": "#d01076",
"titleBar.activeForeground": "#e7e7e7",
"titleBar.inactiveBackground": "#ca469a99",
"titleBar.inactiveBackground": "#d0107699",
"titleBar.inactiveForeground": "#e7e7e799"
},
"peacock.color": "#ca469a"
"peacock.color": "#d01076"
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pip install aind_session
datetime.datetime(2023, 12, 13, 13, 43, 40)
>>> session.raw_data_asset.id
'16d46411-540a-4122-b47f-8cb2a15d593a'
>>> session.raw_data_folder.as_posix()
>>> session.raw_data_dir.as_posix()
's3://aind-ephys-data/ecephys_676909_2023-12-13_13-43-40'

# Additional functionality in namespace extensions:
Expand Down
14 changes: 7 additions & 7 deletions src/aind_session/extensions/ecephys.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,19 @@ def sorted_data_asset(self) -> codeocean.data_asset.DataAsset:
return asset

@npc_io.cached_property
def sorted_data_folder(self) -> upath.UPath:
def sorted_data_dir(self) -> upath.UPath:
"""Path to the sorted data associated with the session, likely in an S3
bucket.
- uses latest sorted data asset to get path (existence is checked)
- if no sorted data asset is found, checks for a data folder in S3
- if no sorted data asset is found, checks for a data dir in S3
- raises `FileNotFoundError` if no sorted data assets are available to link
to the session
Examples:
>>> from aind_session import Session
>>> session = Session('ecephys_676909_2023-12-13_13-43-40')
>>> session.ecephys.sorted_data_folder.as_posix()
>>> session.ecephys.sorted_data_dir.as_posix()
's3://codeocean-s3datasetsbucket-1u41qdg42ur9/a2a54575-b5ca-4cf0-acd0-2933e18bcb2d'
"""
try:
Expand All @@ -73,15 +73,15 @@ def sorted_data_folder(self) -> upath.UPath:
logger.debug(
f"Using asset {self.sorted_data_asset.id} to find sorted data path for {self._session.id}"
)
sorted_data_folder = (
aind_session.utils.codeocean_utils.get_data_asset_source_folder(
sorted_data_dir = (
aind_session.utils.codeocean_utils.get_data_asset_source_dir(
self.sorted_data_asset
)
)
logger.debug(
f"Sorted data path found for {self._session.id}: {sorted_data_folder}"
f"Sorted data path found for {self._session.id}: {sorted_data_dir}"
)
return sorted_data_folder
return sorted_data_dir

@staticmethod
def is_sorted_data_asset(asset_id: str | codeocean.data_asset.DataAsset) -> bool:
Expand Down
22 changes: 11 additions & 11 deletions src/aind_session/extensions/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,43 +36,43 @@ def __getattr__(self, name: str) -> dict[str, Any]:
>>> content = getattr(session.metadata, 'metadata.nd')
"""
try:
_ = self.json_paths
_ = self.json_files
except FileNotFoundError:
raise AttributeError(
f"No raw data folder found for {self._session.id}"
f"No raw data dir found for {self._session.id}"
) from None
try:
path = next(p for p in self.json_paths if p.stem == str(name))
path = next(p for p in self.json_files if p.stem == str(name))
except StopIteration:
raise AttributeError(
f"No {name}.json found in cached view of {self.json_folder.as_posix()}. Available files: {[p.name for p in self.json_paths]}"
f"No {name}.json found in cached view of {self.json_dir.as_posix()}. Available files: {[p.name for p in self.json_files]}"
) from None
else:
logger.debug(f"Using contents of metadata json at {path.as_posix()}")
return json.loads(path.read_text())

@property
def json_folder(self) -> upath.UPath:
"""Parent folder containing metadata json files"""
path = self._session.raw_data_folder # may raise FileNotFoundError
def json_dir(self) -> upath.UPath:
"""Parent dir containing metadata json files"""
path = self._session.raw_data_dir # may raise FileNotFoundError
logger.debug(
f"Using {path.as_posix()} as parent folder for metadata json files"
f"Using {path.as_posix()} as parent dir for metadata json files"
)
return path

@npc_io.cached_property
def json_paths(self) -> tuple[upath.UPath, ...]:
def json_files(self) -> tuple[upath.UPath, ...]:
"""All available metadata jsons in the raw data folder.
Examples:
>>> from aind_session import Session
>>> session = Session('ecephys_676909_2023-12-13_13-43-40')
>>> [path.name for path in session.metadata.json_paths]
>>> [path.name for path in session.metadata.json_files]
['data_description.json', 'metadata.nd.json', 'procedures.json', 'processing.json', 'rig.json', 'session.json', 'subject.json']
"""
return tuple(
sorted(
(path for path in self.json_folder.iterdir() if path.suffix == ".json"),
(path for path in self.json_dir.iterdir() if path.suffix == ".json"),
key=lambda p: p.name,
)
)
Expand Down
26 changes: 13 additions & 13 deletions src/aind_session/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,23 @@ class Session:
datetime.datetime(2023, 12, 13, 13, 43, 40)
>>> session.raw_data_asset.id
'16d46411-540a-4122-b47f-8cb2a15d593a'
>>> session.raw_data_folder.as_posix()
>>> session.raw_data_dir.as_posix()
's3://aind-ephys-data/ecephys_676909_2023-12-13_13-43-40'
# Should be able to handle all platforms:
>>> session = Session('multiplane-ophys_741863_2024-08-13_09-26-41')
>>> session.raw_data_folder.as_posix()
>>> session.raw_data_dir.as_posix()
's3://aind-private-data-prod-o5171v/multiplane-ophys_741863_2024-08-13_09-26-41'
>>> session = Session('behavior_717121_2024-06-16_11-39-34')
>>> session.raw_data_folder.as_posix()
>>> session.raw_data_dir.as_posix()
's3://aind-private-data-prod-o5171v/behavior_717121_2024-06-16_11-39-34'
>>> session = Session('SmartSPIM_698260_2024-07-20_21-47-21')
>>> session.raw_data_folder.as_posix()
>>> session.raw_data_dir.as_posix()
Traceback (most recent call last):
...
FileNotFoundError: No raw data asset in CodeOcean and no folder in known data buckets on S3 for SmartSPIM_698260_2024-07-20_21-47-21
FileNotFoundError: No raw data asset in CodeOcean and no dir in known data buckets on S3 for SmartSPIM_698260_2024-07-20_21-47-21
# Additional functionality for modalities added by extensions:
>>> session = Session('ecephys_676909_2023-12-13_13-43-40')
Expand Down Expand Up @@ -109,36 +109,36 @@ def raw_data_asset(self) -> codeocean.data_asset.DataAsset:
return asset

@npc_io.cached_property
def raw_data_folder(self) -> upath.UPath:
def raw_data_dir(self) -> upath.UPath:
"""Path to the raw data associated with the session, likely in an S3
bucket.
- uses latest raw data asset to get path (existence is checked)
- if no raw data asset is found, checks for a data folder in S3
- if no raw data asset is found, checks for a data dir in S3
- raises `FileNotFoundError` if no raw data assets are available to link
to the session
"""
try:
_ = self.raw_data_asset
except LookupError:
with contextlib.suppress(FileNotFoundError):
path = aind_session.utils.get_source_folder_by_name(self.id)
path = aind_session.utils.get_source_dir_by_name(self.id)
logger.debug(
f"No raw data asset uploaded for {self.id}, but data folder found: {path}"
f"No raw data asset uploaded for {self.id}, but data dir found: {path}"
)
return path
raise FileNotFoundError(
f"No raw data asset in CodeOcean and no folder in known data buckets on S3 for {self.id}"
f"No raw data asset in CodeOcean and no dir in known data buckets on S3 for {self.id}"
) from None
else:
logger.debug(
f"Using asset {self.raw_data_asset.id} to find raw data path for {self.id}"
)
raw_data_folder = aind_session.utils.get_data_asset_source_folder(
raw_data_dir = aind_session.utils.get_data_asset_source_dir(
self.raw_data_asset
)
logger.debug(f"Raw data folder found for {self.id}: {raw_data_folder}")
return raw_data_folder
logger.debug(f"Raw data dir found for {self.id}: {raw_data_dir}")
return raw_data_dir


if __name__ == "__main__":
Expand Down
16 changes: 8 additions & 8 deletions src/aind_session/utils/codeocean_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,34 +127,34 @@ def is_raw_data_asset(asset: str | uuid.UUID | codeocean.data_asset.DataAsset) -
return False


def get_data_asset_source_folder(
def get_data_asset_source_dir(
asset: str | uuid.UUID | codeocean.data_asset.DataAsset,
) -> upath.UPath:
"""Get the source folder for a data asset.
"""Get the source dir for a data asset.
- the path is constructed from the asset's `source_bucket` metadata
- otherwise, the path is constructed from the asset's ID and known S3
buckets, and existence is checked
- otherwse, the path is constructed from the asset's name and known S3
buckets, and existence is checked
- raises `FileNotFoundError` if a folder is not found
- raises `FileNotFoundError` if a dir is not found
Examples:
>>> get_data_asset_source_folder('83636983-f80d-42d6-a075-09b60c6abd5e').as_posix()
>>> get_data_asset_source_dir('83636983-f80d-42d6-a075-09b60c6abd5e').as_posix()
's3://aind-ephys-data/ecephys_668759_2023-07-11_13-07-32'
"""

def get_folder_from_known_s3_locations(
def get_dir_from_known_s3_locations(
asset: codeocean.data_asset.DataAsset,
) -> upath.UPath:
for key in (asset.id, asset.name):
with contextlib.suppress(FileNotFoundError):
return aind_session.utils.get_source_folder_by_name(
return aind_session.utils.get_source_dir_by_name(
key, ttl_hash=aind_session.utils.get_ttl_hash(10 * 60)
)
raise FileNotFoundError(
f"No source folder found for {asset.id=} or {asset.name=} in known S3 buckets"
f"No source dir found for {asset.id=} or {asset.name=} in known S3 buckets"
)

asset = get_data_asset(asset)
Expand Down Expand Up @@ -182,7 +182,7 @@ def get_folder_from_known_s3_locations(
logger.debug(
f"No source_bucket metadata available for {asset.id}, {asset.name}"
)
return get_folder_from_known_s3_locations(asset)
return get_dir_from_known_s3_locations(asset)


@functools.cache
Expand Down
10 changes: 5 additions & 5 deletions src/aind_session/utils/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@


@functools.cache
def get_source_folder_by_name(name: str, ttl_hash: int | None = None) -> upath.UPath:
"""Checks known S3 buckets for a folder with the given name.
def get_source_dir_by_name(name: str, ttl_hash: int | None = None) -> upath.UPath:
"""Checks known S3 buckets for a dir with the given name.
- raises `FileNotFoundError` if the folder is not found
- raises `FileNotFoundError` if the dir is not found
Examples:
>>> get_source_folder_by_name('ecephys_676909_2023-12-13_13-43-40').as_posix()
>>> get_source_dir_by_name('ecephys_676909_2023-12-13_13-43-40').as_posix()
's3://aind-ephys-data/ecephys_676909_2023-12-13_13-43-40'
"""
del ttl_hash # only used for functools.cache
Expand All @@ -32,7 +32,7 @@ def get_source_folder_by_name(name: str, ttl_hash: int | None = None) -> upath.U
if path.exists():
return path
raise FileNotFoundError(
f"No folder named {name!r} found in known data buckets on S3"
f"No dir named {name!r} found in known data buckets on S3"
)


Expand Down

0 comments on commit 50456d5

Please sign in to comment.