From 6bee01c232755ecedaee35a311353f55e5b020db Mon Sep 17 00:00:00 2001
From: nilegraddis <nilegraddis@gmail.com>
Date: Thu, 10 Oct 2019 17:16:40 -0700
Subject: [PATCH] apply patched ecephys channel structure assignments to nwb
 data

---
 .../ecephys/ecephys_project_cache.py          | 11 +++++++
 .../ecephys_nwb_session_api.py                | 31 ++++++++++++++++++-
 .../ecephys/test_ecephys_session_nwb_api.py   | 27 ++++++++++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 allensdk/test/brain_observatory/ecephys/test_ecephys_session_nwb_api.py

diff --git a/allensdk/brain_observatory/ecephys/ecephys_project_cache.py b/allensdk/brain_observatory/ecephys/ecephys_project_cache.py
index cd3a74fc5..85f00ee29 100644
--- a/allensdk/brain_observatory/ecephys/ecephys_project_cache.py
+++ b/allensdk/brain_observatory/ecephys/ecephys_project_cache.py
@@ -290,10 +290,21 @@ def get_session_data(self, session_id, filter_by_validity: bool = True, **unit_f
                                                  filter_by_validity=True,
                                                  **unit_filter_kwargs)
 
+        def get_channel_columns():
+            channels = self.get_channels()
+            return channels.loc[channels["ecephys_session_id"] == session_id, [
+                "ecephys_structure_id", 
+                "ecephys_structure_acronym", 
+                "anterior_posterior_ccf_coordinate",
+                "dorsal_ventral_ccf_coordinate", 
+                "left_right_ccf_coordinate"
+            ]]
+
         session_api = EcephysNwbSessionApi(
             path=path,
             probe_lfp_paths=probe_promises,
             additional_unit_metrics=get_analysis_metrics,
+            external_channel_columns=get_channel_columns,
             filter_by_validity=filter_by_validity,
             **unit_filter_kwargs
         )
diff --git a/allensdk/brain_observatory/ecephys/ecephys_session_api/ecephys_nwb_session_api.py b/allensdk/brain_observatory/ecephys/ecephys_session_api/ecephys_nwb_session_api.py
index 72add6c65..d82dfd40e 100644
--- a/allensdk/brain_observatory/ecephys/ecephys_session_api/ecephys_nwb_session_api.py
+++ b/allensdk/brain_observatory/ecephys/ecephys_session_api/ecephys_nwb_session_api.py
@@ -23,6 +23,7 @@ def __init__(self,
                  path,
                  probe_lfp_paths: Optional[Dict[int, FilePromise]] = None,
                  additional_unit_metrics=None,
+                 external_channel_columns=None,
                  **kwargs):
 
         self.filter_out_of_brain_units = kwargs.pop("filter_out_of_brain_units", True)
@@ -35,6 +36,7 @@ def __init__(self,
         self.probe_lfp_paths = probe_lfp_paths
 
         self.additional_unit_metrics = additional_unit_metrics
+        self.external_channel_columns = external_channel_columns
 
     def get_session_start_time(self):
         return self.nwbfile.session_start_time
@@ -93,7 +95,20 @@ def get_channels(self) -> pd.DataFrame:
 
         # these are stored as string in nwb 2, which is not ideal
         # float is also not ideal, but we have nans indicating out-of-brain structures
-        channels["ecephys_structure_id"] = [float(chid) if chid != "" else np.nan for chid in channels["ecephys_structure_id"]]
+        channels["ecephys_structure_id"] = [
+            float(chid) if chid != "" 
+            else np.nan 
+            for chid in channels["ecephys_structure_id"]
+        ]
+        channels["ecephys_structure_acronym"] = [
+            ch_acr if ch_acr not in set(["None", ""]) 
+            else np.nan 
+            for ch_acr in channels["ecephys_structure_acronym"]
+        ]
+
+        if self.external_channel_columns is not None:
+            external_channel_columns = self.external_channel_columns()
+            channels = clobbering_merge(channels, external_channel_columns, left_index=True, right_index=True)
 
         if self.filter_by_validity:
             channels = channels[channels["valid_data"]]
@@ -298,3 +313,17 @@ def _get_full_units_table(self) -> pd.DataFrame:
 
     def get_metadata(self):
         return self.nwbfile.lab_meta_data['metadata'].to_dict()
+
+
+def clobbering_merge(to_df, from_df, **kwargs):
+    overlapping = set(to_df.columns.values.tolist()) & set(from_df.columns.values.tolist())
+    
+    for merge_param in ["on", "left_on", "right_on"]:
+        if merge_param in kwargs:
+            merge_arg = kwargs.get(merge_param)
+            if isinstance(merge_arg, str):
+                merge_arg = [merge_arg]
+            overlapping = overlapping - set(list(merge_arg))
+
+    to_df = to_df.drop(columns=list(overlapping))
+    return pd.merge(to_df, from_df, **kwargs)
diff --git a/allensdk/test/brain_observatory/ecephys/test_ecephys_session_nwb_api.py b/allensdk/test/brain_observatory/ecephys/test_ecephys_session_nwb_api.py
new file mode 100644
index 000000000..6c0a8db23
--- /dev/null
+++ b/allensdk/test/brain_observatory/ecephys/test_ecephys_session_nwb_api.py
@@ -0,0 +1,27 @@
+# most of the tests for this functionality are actually in test_write_nwb
+
+import pytest
+import pandas as pd
+
+import allensdk.brain_observatory.ecephys.ecephys_session_api.ecephys_nwb_session_api as ensa
+
+
+@pytest.mark.parametrize("left,right,expected,left_on,right_on", [
+    [
+        pd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}),
+        pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}),
+        pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}),
+        "a",
+        "a"
+    ],
+    [
+        pd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}),
+        pd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [7, 8, 9]}),
+        pd.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [7, 8, 9]}),
+        ["a", "b"],
+        ["a", "b"]
+    ]
+])
+def test_clobbering_merge(left, right, expected, left_on, right_on):
+    obtained = ensa.clobbering_merge(left, right, left_on=left_on, right_on=left_on)
+    pd.testing.assert_frame_equal(expected, obtained, check_like=True)
\ No newline at end of file