Skip to content

Commit

Permalink
Merge pull request #4 from maestroque/bids-support
Browse files Browse the repository at this point in the history
Add Physio object generation from BIDS
  • Loading branch information
m-miedema authored Aug 21, 2024
2 parents 0581dc2 + ffa8e71 commit e0562d4
Show file tree
Hide file tree
Showing 6 changed files with 312 additions and 7 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,7 @@ dmypy.json
.pyre/

.vscode/

# Test Data
physutils/tests/data/bids-dir
tmp.*
124 changes: 122 additions & 2 deletions physutils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,143 @@

import importlib
import json
import os
import os.path as op

import numpy as np
from bids import BIDSLayout
from loguru import logger

from physutils import physio

EXPECTED = ["data", "fs", "history", "metadata"]


def load_from_bids(
bids_path,
subject,
session=None,
task=None,
run=None,
recording=None,
extension="tsv.gz",
suffix="physio",
):
"""
Load physiological data from BIDS-formatted directory
Parameters
----------
bids_path : str
Path to BIDS-formatted directory
subject : str
Subject identifier
session : str
Session identifier
task : str
Task identifier
run : str
Run identifier
suffix : str
Suffix of file to load
Returns
-------
data : :class:`physutils.Physio`
Loaded physiological data
"""

# check if file exists and is in BIDS format
if not op.exists(bids_path):
raise FileNotFoundError(f"Provided path {bids_path} does not exist")

layout = BIDSLayout(bids_path, validate=False)
bids_file = layout.get(
subject=subject,
session=session,
task=task,
run=run,
suffix=suffix,
extension=extension,
recording=recording,
)
logger.debug(f"BIDS file found: {bids_file}")
if len(bids_file) == 0:
raise FileNotFoundError(
f"No files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}"
)
if len(bids_file) > 1:
raise ValueError(
f"Multiple files found for subject {subject}, session {session}, task {task}, run {run}, recording {recording}"
)

config_file = bids_file[0].get_metadata()
fs = config_file["SamplingFrequency"]
t_start = config_file["StartTime"] if "StartTime" in config_file else 0
columns = config_file["Columns"]
logger.debug(f"Loaded structure contains columns: {columns}")

physio_objects = {}
data = np.loadtxt(bids_file[0].path)

if "time" in columns:
idx_0 = np.argmax(data[:, columns.index("time")] >= t_start)
else:
idx_0 = 0
logger.warning(
"No time column found in file. Assuming data starts at the beginning of the file"
)

for col in columns:
col_physio_type = None
if any([x in col.lower() for x in ["cardiac", "ppg", "ecg", "card", "pulse"]]):
col_physio_type = "cardiac"
elif any(
[
x in col.lower()
for x in ["respiratory", "rsp", "resp", "breath", "co2", "o2"]
]
):
col_physio_type = "respiratory"
elif any([x in col.lower() for x in ["trigger", "tr"]]):
col_physio_type = "trigger"
elif any([x in col.lower() for x in ["time"]]):
continue
else:
logger.warning(
f"Column {col}'s type cannot be determined. Additional features may be missing."
)

if col_physio_type in ["cardiac", "respiratory"]:
physio_objects[col] = physio.Physio(
data[idx_0:, columns.index(col)],
fs=fs,
history=[physio._get_call(exclude=[])],
)
physio_objects[col]._physio_type = col_physio_type
physio_objects[col]._label = (
bids_file[0].filename.split(".")[0].replace("_physio", "")
)

if col_physio_type == "trigger":
# TODO: Implement trigger loading using the MRI data object
logger.warning("MRI trigger characteristics extraction not yet implemented")
physio_objects[col] = physio.Physio(
data[idx_0:, columns.index(col)],
fs=fs,
history=[physio._get_call(exclude=[])],
)

return physio_objects


def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False):
"""
Returns `Physio` object with provided data
Parameters
----------
data : str or array_like or Physio_like
data : str, os.path.PathLike or array_like or Physio_like
Input physiological data. If array_like, should be one-dimensional
fs : float, optional
Sampling rate of `data`. Default: None
Expand All @@ -46,7 +166,7 @@ def load_physio(data, *, fs=None, dtype=None, history=None, allow_pickle=False):

# first check if the file was made with `save_physio`; otherwise, try to
# load it as a plain text file and instantiate a history
if isinstance(data, str):
if isinstance(data, str) or isinstance(data, os.PathLike):
try:
inp = dict(np.load(data, allow_pickle=allow_pickle))
for attr in EXPECTED:
Expand Down
54 changes: 51 additions & 3 deletions physutils/physio.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,10 +220,12 @@ def new_physio_like(

if suppdata is None:
suppdata = ref_physio._suppdata if copy_suppdata else None

label = ref_physio.label if copy_label else None
physio_type = ref_physio.physio_type if copy_physio_type else None
computed_metrics = list(ref_physio.computed_metrics) if copy_computed_metrics else []
computed_metrics = (
dict(ref_physio.computed_metrics) if copy_computed_metrics else {}
)

# make new class
out = ref_physio.__class__(
Expand Down Expand Up @@ -340,7 +342,7 @@ def __init__(
reject=np.empty(0, dtype=int),
)
self._suppdata = None if suppdata is None else np.asarray(suppdata).squeeze()
self._computed_metrics = []
self._computed_metrics = dict()

def __array__(self):
return self.data
Expand Down Expand Up @@ -542,3 +544,49 @@ def neurokit2phys(
metadata = dict(peaks=peaks)

return cls(data, fs=fs, metadata=metadata, **kwargs)


class MRIConfig:
"""
Class to hold MRI configuration information
Parameters
----------
slice_timings : 1D array_like
Slice timings in seconds
n_scans : int
Number of volumes in the MRI scan
tr : float
Repetition time in seconds
"""

def __init__(self, slice_timings=None, n_scans=None, tr=None):
if np.ndim(slice_timings) > 1:
raise ValueError("Slice timings must be a 1-dimensional array.")

self._slice_timings = np.asarray(slice_timings)
self._n_scans = int(n_scans)
self._tr = float(tr)
logger.debug(f"Initializing new MRIConfig object: {self}")

def __str__(self):
return "{name}(n_scans={n_scans}, tr={tr})".format(
name=self.__class__.__name__,
n_scans=self._n_scans,
tr=self._tr,
)

@property
def slice_timings(self):
"""Slice timings in seconds"""
return self._slice_timings

@property
def n_scans(self):
"""Number of volumes in the MRI scan"""
return self._n_scans

@property
def tr(self):
"""Repetition time in seconds"""
return self._tr
43 changes: 42 additions & 1 deletion physutils/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
import pytest

from physutils import io, physio
from physutils.tests.utils import filter_physio, get_test_data_path
from physutils.tests.utils import (
create_random_bids_structure,
filter_physio,
get_test_data_path,
)


def test_load_physio(caplog):
Expand Down Expand Up @@ -46,6 +50,43 @@ def test_load_physio(caplog):
io.load_physio([1, 2, 3])


def test_load_from_bids():
create_random_bids_structure("physutils/tests/data", recording_id="cardiac")
phys_array = io.load_from_bids(
"physutils/tests/data/bids-dir",
subject="01",
session="01",
task="rest",
run="01",
recording="cardiac",
)

for col in phys_array.keys():
assert isinstance(phys_array[col], physio.Physio)
# The data saved are the ones after t_0 = -3s
assert phys_array[col].data.size == 80000
assert phys_array[col].fs == 10000.0
assert phys_array[col].history[0][0] == "physutils.io.load_from_bids"


def test_load_from_bids_no_rec():
create_random_bids_structure("physutils/tests/data")
phys_array = io.load_from_bids(
"physutils/tests/data/bids-dir",
subject="01",
session="01",
task="rest",
run="01",
)

for col in phys_array.keys():
assert isinstance(phys_array[col], physio.Physio)
# The data saved are the ones after t_0 = -3s
assert phys_array[col].data.size == 80000
assert phys_array[col].fs == 10000.0
assert phys_array[col].history[0][0] == "physutils.io.load_from_bids"


def test_save_physio(tmpdir):
pckl = io.load_physio(get_test_data_path("ECG.phys"), allow_pickle=True)
out = io.save_physio(tmpdir.join("tmp").purebasename, pckl)
Expand Down
91 changes: 91 additions & 0 deletions physutils/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
Utilities for testing
"""

import json
from os import makedirs
from os.path import join as pjoin

import numpy as np
import pandas as pd
from pkg_resources import resource_filename
from scipy import signal

Expand Down Expand Up @@ -77,3 +80,91 @@ def filter_physio(data, cutoffs, method, *, order=3):
filtered = physio.new_physio_like(data, signal.filtfilt(b, a, data))

return filtered


def create_random_bids_structure(data_dir, recording_id=None):

dataset_description = {
"Name": "Example BIDS Dataset",
"BIDSVersion": "1.7.0",
"License": "",
"Authors": ["Author1", "Author2"],
"Acknowledgements": "",
"HowToAcknowledge": "",
"Funding": "",
"ReferencesAndLinks": "",
"DatasetDOI": "",
}

physio_json = {
"SamplingFrequency": 10000.0,
"StartTime": -3,
"Columns": [
"time",
"respiratory_chest",
"trigger",
"cardiac",
"respiratory_CO2",
"respiratory_O2",
],
}

# Create BIDS structure directory
subject_id = "01"
session_id = "01"
task_id = "rest"
run_id = "01"
recording_id = recording_id

bids_dir = pjoin(
data_dir, "bids-dir", f"sub-{subject_id}", f"ses-{session_id}", "func"
)
makedirs(bids_dir, exist_ok=True)

# Create dataset_description.json
with open(pjoin(data_dir, "bids-dir", "dataset_description.json"), "w") as f:
json.dump(dataset_description, f, indent=4)

if recording_id is not None:
filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}_recording-{recording_id}"
else:
filename_body = f"sub-{subject_id}_ses-{session_id}_task-{task_id}_run-{run_id}"

# Create physio.json
with open(
pjoin(
bids_dir,
f"{filename_body}_physio.json",
),
"w",
) as f:
json.dump(physio_json, f, indent=4)

# Initialize tsv file with random data columns and a time column
num_rows = 100000
num_cols = 6
time_offset = 2
time = (
np.arange(num_rows) / physio_json["SamplingFrequency"]
+ physio_json["StartTime"]
- time_offset
)
data = np.column_stack((time, np.random.rand(num_rows, num_cols - 1).round(8)))
df = pd.DataFrame(data)

# Compress dataframe into tsv.gz
tsv_gz_file = pjoin(
bids_dir,
f"{filename_body}_physio.tsv.gz",
)

df.to_csv(
tsv_gz_file,
sep="\t",
index=False,
header=False,
float_format="%.8e",
compression="gzip",
)

return bids_dir
Loading

0 comments on commit e0562d4

Please sign in to comment.