Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature rotation and IO improvement #43

Merged
merged 22 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
583be4e
add rotate io
chiaweh2 Sep 4, 2024
1c286d8
fix extra pass
chiaweh2 Sep 4, 2024
a0e2369
rename regrid preprocess
chiaweh2 Sep 4, 2024
21903a2
fix name due to the batch operation
chiaweh2 Sep 4, 2024
d5df29d
fix pylint warning
chiaweh2 Sep 4, 2024
778a735
implement vector rotate class and first test complete
chiaweh2 Sep 4, 2024
0a0b08f
improve the chunk and resolve the different freq but same varname issue
chiaweh2 Sep 6, 2024
840d924
correct long name and unit in the output ds
chiaweh2 Sep 6, 2024
9fe138f
add data freq
chiaweh2 Sep 6, 2024
ffabfe6
finish testing the freq find function for historical data
chiaweh2 Sep 6, 2024
ab3dbbb
finish rotate and regrid daily data
chiaweh2 Sep 6, 2024
e2e95ee
rename rotation script for clarity
chiaweh2 Sep 9, 2024
537aa36
fix bug in historical local regrid chunk input
chiaweh2 Sep 9, 2024
09d9f97
fix class name and usage message
chiaweh2 Sep 9, 2024
610ae2f
add feature to calculate historical run climatology monthly and daily
chiaweh2 Sep 9, 2024
2c9ca74
fixing misspell in the mag
chiaweh2 Sep 9, 2024
52f3504
include the hist test sample
chiaweh2 Sep 9, 2024
d531712
fix test with missing kwarg in the histrun io based on new dev in io …
chiaweh2 Sep 9, 2024
f0b6408
add test case for historical run local and opendap tested
chiaweh2 Sep 10, 2024
db68001
load data before compute to avoid test fail
chiaweh2 Sep 11, 2024
d4e9432
remove the computation expensive part out when testing with opendap
chiaweh2 Sep 11, 2024
dfc071b
Merge pull request #42 from NOAA-CEFI-Portal/feature_rotate
chiaweh2 Sep 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 141 additions & 10 deletions mom6/mom6_module/mom6_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
import numpy as np
import xarray as xr
from mom6 import DATA_PATH
from mom6.mom6_module.mom6_types import ModelRegionOptions,GridOptions,DataTypeOptions,DataSourceOptions
from mom6.mom6_module.mom6_types import (
ModelRegionOptions,GridOptions,
DataTypeOptions,DataSourceOptions,
DataFreqOptions
)

warnings.simplefilter("ignore")
xr.set_options(keep_attrs=True)
Expand Down Expand Up @@ -132,6 +136,7 @@ def __init__(
tercile_relative_dir : str = None,
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local',
chunks : dict = None
) -> None:
"""
input for the class to get the forecast data
Expand Down Expand Up @@ -165,7 +170,10 @@ def __init__(
self.data_relative_dir = data_relative_dir
self.static_relative_dir = static_relative_dir
self.tercile_relative_dir = tercile_relative_dir

if chunks is None:
self.chunks = {}
else :
self.chunks = chunks

def get_all(self) -> xr.Dataset:
"""
Expand Down Expand Up @@ -194,7 +202,7 @@ def get_all(self) -> xr.Dataset:
else:
ds_static = MOM6Static.get_grid(self.static_relative_dir)
# setup chuck
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog()
for file in file_list:
Expand Down Expand Up @@ -234,7 +242,7 @@ def get_all(self) -> xr.Dataset:
else:
mom6_dir = os.path.join(DATA_PATH,self.data_relative_dir)
file_list = glob.glob(f'{mom6_dir}/*.nc')
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog()
io_chunk = {'init': 1,'member':1,'lead':-1}
Expand Down Expand Up @@ -293,7 +301,7 @@ def get_tercile(
raise OSError('for raw grid please input the path to grid file')
else:
ds_static = MOM6Static.get_grid(self.static_relative_dir)
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog()
for file in file_list:
Expand Down Expand Up @@ -339,7 +347,7 @@ def get_tercile(
else:
mom6_dir = os.path.join(DATA_PATH,self.tercile_relative_dir)
file_list = glob.glob(f'{mom6_dir}/*.nc')
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='forecast').get_catalog()
io_chunk = {'init': 4,'member':1,'lead':-1}
Expand Down Expand Up @@ -493,6 +501,7 @@ def __init__(
static_relative_dir : str = None,
grid : GridOptions = 'raw',
source : DataSourceOptions = 'local',
chunks : dict = None
) -> None:
"""
input for getting the historical run data
Expand Down Expand Up @@ -522,13 +531,88 @@ def __init__(
self.source = source
self.data_relative_dir = data_relative_dir
self.static_relative_dir = static_relative_dir
if chunks is None:
self.chunks = {}
else :
self.chunks = chunks

def get_all(self) -> xr.Dataset:
@staticmethod
def freq_find(
file_list : list[str],
freq : DataFreqOptions = None):
"""finding the correct data based on input frequency

Parameters
----------
file_list : list
a list of files with dir path
freq : str
frequency of the data. Default is None.

Returns
-------
list
a list with only one element which contain the file need
to be read

Raises
------
ValueError
if multiple file present in the `file_list` but no freq is provided
an error message is shown in stdout
"""
# differen frequency file exists resulting in multi files
if len(file_list)>1 and freq is not None :
if freq not in ['annual','monthly','daily']:
raise ValueError(
'freq kwarg need to be given due to multiple files exist '+
'with the same variable with different frequency.'
)
filtered_file_list = None
for file in file_list:
filename_split = file.split('/')[-1].split('.')

# 0 : exp, 1 : time span, 2: varname, 3:file format
freq_span = len(filename_split[1])
if freq_span == int(4+4+1) and freq == 'annual':
filtered_file_list = [file]
elif freq_span == int(6+6+1) and freq == 'monthly':
filtered_file_list = [file]
elif freq_span == int(8+8+1) and freq == 'daily':
filtered_file_list = [file]
else :
pass
if filtered_file_list is None:
raise ValueError(
'freq kwarg need to be given due to multiple files exist '+
'with the same variable with different frequency.'
)

elif len(file_list)>1 and freq is None :
raise ValueError(
'freq kwarg need to be given due to multiple files exist '+
'with the same variable with different frequency.'
)

else:
filtered_file_list = file_list

return filtered_file_list

def get_all(
self,
freq : DataFreqOptions = None
) -> xr.Dataset:
"""
Return the mom6 all rawgrid/regridded historical run field
with the static field combined and setting the
lon lat related variables to coordinate

Parameters
----------
freq : str
frequency of the data. Default is None.

Returns
-------
xr.Dataset
Expand All @@ -549,7 +633,7 @@ def get_all(self) -> xr.Dataset:
raise IOError('for raw grid please input the path to grid file')
else:
ds_static = MOM6Static.get_grid(self.static_relative_dir)
io_chunk = {}
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='historical').get_catalog()
for file in file_list:
Expand All @@ -560,6 +644,8 @@ def get_all(self) -> xr.Dataset:

file_read = [file for file in file_list if f'.{self.var}.' in file]

file_read = self.freq_find(file_read,freq)

# merge the static field with the variables
ds = xr.open_mfdataset(
file_read,combine='nested',
Expand Down Expand Up @@ -588,15 +674,18 @@ def get_all(self) -> xr.Dataset:
else:
mom6_dir = os.path.join(DATA_PATH,self.data_relative_dir)
file_list = glob.glob(f'{mom6_dir}/*.nc')
io_chunk = self.chunks
elif self.source == 'opendap':
file_list = OpenDapStore(grid=self.grid,data_type='historical').get_catalog()
io_chunk = {'time':100}

file_read = [file for file in file_list if f'.{self.var}.' in file]
file_read = self.freq_find(file_read,freq)
ds = xr.open_mfdataset(
file_read,
combine='nested',
concat_dim='time',
chunks={'time': 100}
chunks=io_chunk
).sortby('time')

# test if accident read raw file
Expand All @@ -617,6 +706,7 @@ def get_single(
year : int = 1993,
month : int = 1,
day : int = 1,
freq : DataFreqOptions = None
) -> xr.Dataset:
"""
Return the mom6 rawgrid/regridded historical run field
Expand All @@ -631,6 +721,8 @@ def get_single(
month of the historical run
day : int
day in month of the historical run
freq : str
frequency of the data. Default is None.

Returns
-------
Expand All @@ -639,7 +731,7 @@ def get_single(
all forecast field include in the `file_list`. The
Dataset object is lazily-loaded.
"""
ds = self.get_all()
ds = self.get_all(freq=freq)

min_year = ds['time.year'].min().data
max_year = ds['time.year'].max().data
Expand Down Expand Up @@ -760,6 +852,45 @@ def get_grid(
'geolon_u','geolat_u',
'geolon_v','geolat_v']
)

@staticmethod
def get_rotate(
data_relative_dir : str
) -> xr.Dataset:
"""return the original mom6 grid rotation information

The information is store in the ice_month.static.nc file

Parameters
----------
data_relative_dir : str
relative path from DATAPATH setup in config file to
the actual forecast/reforecast data, by setting 'forecast/'
which makes the absolute path to DATAPATH/forecast/

Returns
-------
xr.Dataset
The Xarray Dataset object of mom6's grid lon lat
"""
ds_rotate = xr.open_dataset(
os.path.join(DATA_PATH,data_relative_dir,'ice_monthly.static.nc')
)

# prepare the rotation matrix to regular coord names
ds_rotate = ds_rotate.rename({
'yT':'yh',
'xT':'xh',
'GEOLON':'geolon',
'GEOLAT':'geolat',
'COSROT':'cosrot',
'SINROT':'sinrot'
})

return ds_rotate.set_coords(
['geolon','geolat']
)

@staticmethod
def get_mask(
data_relative_dir : str,
Expand Down
37 changes: 36 additions & 1 deletion mom6/mom6_module/mom6_mhw.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,29 @@ def generate_forecast_batch(
# output dataset
ds_mhw = xr.Dataset()
ds_mhw[f'{self.varname}_threshold{quantile_threshold:02d}'] = da_threshold
ds_mhw[f'{self.varname}_threshold{quantile_threshold:02d}'].attrs['long_name'] = (
f'{self.varname} threshold{quantile_threshold:02d})'
)
ds_mhw[f'{self.varname}_threshold{quantile_threshold:02d}'].attrs['units'] = 'degC'

ds_mhw[f'mhw_prob{quantile_threshold:02d}'] = da_prob
ds_mhw[f'mhw_prob{quantile_threshold:02d}'].attrs['long_name'] = (
f'marine heatwave probability (threshold{quantile_threshold:02d})'
)
ds_mhw[f'mhw_prob{quantile_threshold:02d}'].attrs['units'] = 'unitless'

ds_mhw['ssta_avg'] = da_mhw_mag_ave
ds_mhw['mhw_magnitude_indentified_ens'] = da_mhw_mag
ds_mhw['ssta_avg'].attrs['long_name'] = (
'anomalous sea surface temperature ensemble mean'
)
ds_mhw['ssta_avg'].attrs['units'] = 'degC'

ds_mhw['mhw_mag_indentified_ens'] = da_mhw_mag
ds_mhw['mhw_mag_indentified_ens'].attrs['long_name'] = (
'marine heatwave magnitude in each ensemble'
)
ds_mhw['mhw_mag_indentified_ens'].attrs['units'] = 'degC'

ds_mhw.attrs['period_of_quantile'] = da_threshold.attrs['period_of_quantile']
ds_mhw.attrs['period_of_climatology'] = da_threshold.attrs['period_of_climatology']

Expand Down Expand Up @@ -239,4 +259,19 @@ def generate_forecast_single(
ds_mhw['ssta_avg'] = da_mhw_mag_ave
ds_mhw['mhw_mag_indentified_ens'] = da_mhw_mag

ds_mhw[f'mhw_prob{quantile_threshold:02d}'].attrs['long_name'] = (
f'marine heatwave probability (threshold{quantile_threshold:02d})'
)
ds_mhw[f'mhw_prob{quantile_threshold:02d}'].attrs['units'] = 'unitless'

ds_mhw['ssta_avg'].attrs['long_name'] = (
'anomalous sea surface temperature ensemble mean'
)
ds_mhw['ssta_avg'].attrs['units'] = 'degC'

ds_mhw['mhw_mag_indentified_ens'].attrs['long_name'] = (
'marine heatwave magnitude in each ensemble'
)
ds_mhw['mhw_mag_indentified_ens'].attrs['units'] = 'degC'

return ds_mhw
Loading