diff --git a/.vscode/settings.json b/.vscode/settings.json index 9f48791..1fcddb0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -6,12 +6,12 @@ "python.analysis.typeCheckingMode": "off", "workbench.colorCustomizations": { "commandCenter.border": "#e7e7e799", - "panel.border": "#d56eb0", - "sash.hoverBorder": "#d56eb0", - "titleBar.activeBackground": "#ca469a", + "panel.border": "#ee2590", + "sash.hoverBorder": "#ee2590", + "titleBar.activeBackground": "#d01076", "titleBar.activeForeground": "#e7e7e7", - "titleBar.inactiveBackground": "#ca469a99", + "titleBar.inactiveBackground": "#d0107699", "titleBar.inactiveForeground": "#e7e7e799" }, - "peacock.color": "#ca469a" + "peacock.color": "#d01076" } \ No newline at end of file diff --git a/README.md b/README.md index 63d0af4..99a03d9 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ pip install aind_session datetime.datetime(2023, 12, 13, 13, 43, 40) >>> session.raw_data_asset.id '16d46411-540a-4122-b47f-8cb2a15d593a' ->>> session.raw_data_folder.as_posix() +>>> session.raw_data_dir.as_posix() 's3://aind-ephys-data/ecephys_676909_2023-12-13_13-43-40' # Additional functionality in namespace extensions: diff --git a/src/aind_session/extensions/ecephys.py b/src/aind_session/extensions/ecephys.py index d667adf..95dbcd4 100644 --- a/src/aind_session/extensions/ecephys.py +++ b/src/aind_session/extensions/ecephys.py @@ -48,19 +48,19 @@ def sorted_data_asset(self) -> codeocean.data_asset.DataAsset: return asset @npc_io.cached_property - def sorted_data_folder(self) -> upath.UPath: + def sorted_data_dir(self) -> upath.UPath: """Path to the sorted data associated with the session, likely in an S3 bucket. - uses latest sorted data asset to get path (existence is checked) - - if no sorted data asset is found, checks for a data folder in S3 + - if no sorted data asset is found, checks for a data dir in S3 - raises `FileNotFoundError` if no sorted data assets are available to link to the session Examples: >>> from aind_session import Session >>> session = Session('ecephys_676909_2023-12-13_13-43-40') - >>> session.ecephys.sorted_data_folder.as_posix() + >>> session.ecephys.sorted_data_dir.as_posix() 's3://codeocean-s3datasetsbucket-1u41qdg42ur9/a2a54575-b5ca-4cf0-acd0-2933e18bcb2d' """ try: @@ -73,15 +73,15 @@ def sorted_data_folder(self) -> upath.UPath: logger.debug( f"Using asset {self.sorted_data_asset.id} to find sorted data path for {self._session.id}" ) - sorted_data_folder = ( - aind_session.utils.codeocean_utils.get_data_asset_source_folder( + sorted_data_dir = ( + aind_session.utils.codeocean_utils.get_data_asset_source_dir( self.sorted_data_asset ) ) logger.debug( - f"Sorted data path found for {self._session.id}: {sorted_data_folder}" + f"Sorted data path found for {self._session.id}: {sorted_data_dir}" ) - return sorted_data_folder + return sorted_data_dir @staticmethod def is_sorted_data_asset(asset_id: str | codeocean.data_asset.DataAsset) -> bool: diff --git a/src/aind_session/extensions/metadata.py b/src/aind_session/extensions/metadata.py index 283f600..899aebf 100644 --- a/src/aind_session/extensions/metadata.py +++ b/src/aind_session/extensions/metadata.py @@ -36,43 +36,43 @@ def __getattr__(self, name: str) -> dict[str, Any]: >>> content = getattr(session.metadata, 'metadata.nd') """ try: - _ = self.json_paths + _ = self.json_files except FileNotFoundError: raise AttributeError( - f"No raw data folder found for {self._session.id}" + f"No raw data dir found for {self._session.id}" ) from None try: - path = next(p for p in self.json_paths if p.stem == str(name)) + path = next(p for p in self.json_files if p.stem == str(name)) except StopIteration: raise AttributeError( - f"No {name}.json found in cached view of {self.json_folder.as_posix()}. Available files: {[p.name for p in self.json_paths]}" + f"No {name}.json found in cached view of {self.json_dir.as_posix()}. Available files: {[p.name for p in self.json_files]}" ) from None else: logger.debug(f"Using contents of metadata json at {path.as_posix()}") return json.loads(path.read_text()) @property - def json_folder(self) -> upath.UPath: - """Parent folder containing metadata json files""" - path = self._session.raw_data_folder # may raise FileNotFoundError + def json_dir(self) -> upath.UPath: + """Parent dir containing metadata json files""" + path = self._session.raw_data_dir # may raise FileNotFoundError logger.debug( - f"Using {path.as_posix()} as parent folder for metadata json files" + f"Using {path.as_posix()} as parent dir for metadata json files" ) return path @npc_io.cached_property - def json_paths(self) -> tuple[upath.UPath, ...]: + def json_files(self) -> tuple[upath.UPath, ...]: """All available metadata jsons in the raw data folder. Examples: >>> from aind_session import Session >>> session = Session('ecephys_676909_2023-12-13_13-43-40') - >>> [path.name for path in session.metadata.json_paths] + >>> [path.name for path in session.metadata.json_files] ['data_description.json', 'metadata.nd.json', 'procedures.json', 'processing.json', 'rig.json', 'session.json', 'subject.json'] """ return tuple( sorted( - (path for path in self.json_folder.iterdir() if path.suffix == ".json"), + (path for path in self.json_dir.iterdir() if path.suffix == ".json"), key=lambda p: p.name, ) ) diff --git a/src/aind_session/session.py b/src/aind_session/session.py index 204e9dc..e72e46c 100644 --- a/src/aind_session/session.py +++ b/src/aind_session/session.py @@ -32,23 +32,23 @@ class Session: datetime.datetime(2023, 12, 13, 13, 43, 40) >>> session.raw_data_asset.id '16d46411-540a-4122-b47f-8cb2a15d593a' - >>> session.raw_data_folder.as_posix() + >>> session.raw_data_dir.as_posix() 's3://aind-ephys-data/ecephys_676909_2023-12-13_13-43-40' # Should be able to handle all platforms: >>> session = Session('multiplane-ophys_741863_2024-08-13_09-26-41') - >>> session.raw_data_folder.as_posix() + >>> session.raw_data_dir.as_posix() 's3://aind-private-data-prod-o5171v/multiplane-ophys_741863_2024-08-13_09-26-41' >>> session = Session('behavior_717121_2024-06-16_11-39-34') - >>> session.raw_data_folder.as_posix() + >>> session.raw_data_dir.as_posix() 's3://aind-private-data-prod-o5171v/behavior_717121_2024-06-16_11-39-34' >>> session = Session('SmartSPIM_698260_2024-07-20_21-47-21') - >>> session.raw_data_folder.as_posix() + >>> session.raw_data_dir.as_posix() Traceback (most recent call last): ... - FileNotFoundError: No raw data asset in CodeOcean and no folder in known data buckets on S3 for SmartSPIM_698260_2024-07-20_21-47-21 + FileNotFoundError: No raw data asset in CodeOcean and no dir in known data buckets on S3 for SmartSPIM_698260_2024-07-20_21-47-21 # Additional functionality for modalities added by extensions: >>> session = Session('ecephys_676909_2023-12-13_13-43-40') @@ -109,12 +109,12 @@ def raw_data_asset(self) -> codeocean.data_asset.DataAsset: return asset @npc_io.cached_property - def raw_data_folder(self) -> upath.UPath: + def raw_data_dir(self) -> upath.UPath: """Path to the raw data associated with the session, likely in an S3 bucket. - uses latest raw data asset to get path (existence is checked) - - if no raw data asset is found, checks for a data folder in S3 + - if no raw data asset is found, checks for a data dir in S3 - raises `FileNotFoundError` if no raw data assets are available to link to the session """ @@ -122,23 +122,23 @@ def raw_data_folder(self) -> upath.UPath: _ = self.raw_data_asset except LookupError: with contextlib.suppress(FileNotFoundError): - path = aind_session.utils.get_source_folder_by_name(self.id) + path = aind_session.utils.get_source_dir_by_name(self.id) logger.debug( - f"No raw data asset uploaded for {self.id}, but data folder found: {path}" + f"No raw data asset uploaded for {self.id}, but data dir found: {path}" ) return path raise FileNotFoundError( - f"No raw data asset in CodeOcean and no folder in known data buckets on S3 for {self.id}" + f"No raw data asset in CodeOcean and no dir in known data buckets on S3 for {self.id}" ) from None else: logger.debug( f"Using asset {self.raw_data_asset.id} to find raw data path for {self.id}" ) - raw_data_folder = aind_session.utils.get_data_asset_source_folder( + raw_data_dir = aind_session.utils.get_data_asset_source_dir( self.raw_data_asset ) - logger.debug(f"Raw data folder found for {self.id}: {raw_data_folder}") - return raw_data_folder + logger.debug(f"Raw data dir found for {self.id}: {raw_data_dir}") + return raw_data_dir if __name__ == "__main__": diff --git a/src/aind_session/utils/codeocean_utils.py b/src/aind_session/utils/codeocean_utils.py index a26b35e..2e589f3 100644 --- a/src/aind_session/utils/codeocean_utils.py +++ b/src/aind_session/utils/codeocean_utils.py @@ -127,10 +127,10 @@ def is_raw_data_asset(asset: str | uuid.UUID | codeocean.data_asset.DataAsset) - return False -def get_data_asset_source_folder( +def get_data_asset_source_dir( asset: str | uuid.UUID | codeocean.data_asset.DataAsset, ) -> upath.UPath: - """Get the source folder for a data asset. + """Get the source dir for a data asset. - the path is constructed from the asset's `source_bucket` metadata - otherwise, the path is constructed from the asset's ID and known S3 @@ -138,23 +138,23 @@ def get_data_asset_source_folder( - otherwse, the path is constructed from the asset's name and known S3 buckets, and existence is checked - - raises `FileNotFoundError` if a folder is not found + - raises `FileNotFoundError` if a dir is not found Examples: - >>> get_data_asset_source_folder('83636983-f80d-42d6-a075-09b60c6abd5e').as_posix() + >>> get_data_asset_source_dir('83636983-f80d-42d6-a075-09b60c6abd5e').as_posix() 's3://aind-ephys-data/ecephys_668759_2023-07-11_13-07-32' """ - def get_folder_from_known_s3_locations( + def get_dir_from_known_s3_locations( asset: codeocean.data_asset.DataAsset, ) -> upath.UPath: for key in (asset.id, asset.name): with contextlib.suppress(FileNotFoundError): - return aind_session.utils.get_source_folder_by_name( + return aind_session.utils.get_source_dir_by_name( key, ttl_hash=aind_session.utils.get_ttl_hash(10 * 60) ) raise FileNotFoundError( - f"No source folder found for {asset.id=} or {asset.name=} in known S3 buckets" + f"No source dir found for {asset.id=} or {asset.name=} in known S3 buckets" ) asset = get_data_asset(asset) @@ -182,7 +182,7 @@ def get_folder_from_known_s3_locations( logger.debug( f"No source_bucket metadata available for {asset.id}, {asset.name}" ) - return get_folder_from_known_s3_locations(asset) + return get_dir_from_known_s3_locations(asset) @functools.cache diff --git a/src/aind_session/utils/s3_utils.py b/src/aind_session/utils/s3_utils.py index ce1f4ec..7f44453 100644 --- a/src/aind_session/utils/s3_utils.py +++ b/src/aind_session/utils/s3_utils.py @@ -16,13 +16,13 @@ @functools.cache -def get_source_folder_by_name(name: str, ttl_hash: int | None = None) -> upath.UPath: - """Checks known S3 buckets for a folder with the given name. +def get_source_dir_by_name(name: str, ttl_hash: int | None = None) -> upath.UPath: + """Checks known S3 buckets for a dir with the given name. - - raises `FileNotFoundError` if the folder is not found + - raises `FileNotFoundError` if the dir is not found Examples: - >>> get_source_folder_by_name('ecephys_676909_2023-12-13_13-43-40').as_posix() + >>> get_source_dir_by_name('ecephys_676909_2023-12-13_13-43-40').as_posix() 's3://aind-ephys-data/ecephys_676909_2023-12-13_13-43-40' """ del ttl_hash # only used for functools.cache @@ -32,7 +32,7 @@ def get_source_folder_by_name(name: str, ttl_hash: int | None = None) -> upath.U if path.exists(): return path raise FileNotFoundError( - f"No folder named {name!r} found in known data buckets on S3" + f"No dir named {name!r} found in known data buckets on S3" )