-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathio.py
More file actions
121 lines (98 loc) · 3.93 KB
/
io.py
File metadata and controls
121 lines (98 loc) · 3.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# coding: utf-8
# Copyright 2020-2022 IFREMER (Brest, FRANCE), all rights reserved.
# contact -- mailto:[email protected]
#
# This file is part of Resourcecode.
# Written by Logilab SA ([email protected])
#
# Resourcecode is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3.0 of the License, or any later version.
#
# Resourcecode is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Resourcecode. If not, see <https://www.gnu.org/licenses/>.
import json
from typing import TYPE_CHECKING, Union
from pathlib import Path
import xarray
import pandas as pd
import scipy
from resourcecode.data import DATA_DIR
if TYPE_CHECKING:
try:
from dask.delayed import Delayed
except ImportError:
Delayed = None
with open(DATA_DIR / "netcdf_description.json") as fobj:
NETCFD_DESCRIPTION = json.load(fobj)
def to_netcdf(
dataframe: pd.DataFrame, path: Union[str, Path, None] = None
) -> Union[bytes, "Delayed", None]:
"""Write dataframe contents to a netCFD file.
Parameters
----------
path: str, Path or file-like, optional
Path to which to save this dataset. File-like objects are only supported
by the scipy engine. If no path is provided, this function returns the
resulting netCDF file as bytes; in this case, we need to use scipy,
which does not support netCDF version 4 (the default format becomes
NETCDF3_64BIT).
"""
xr = dataframe.to_xarray()
for variable in xr:
variable_attrs = NETCFD_DESCRIPTION.get(variable, {})
if not variable_attrs:
continue
scale_factor = variable_attrs.get("scale_factor", 1)
add_offset = variable_attrs.get("add_offset", 0)
xr[variable] = (xr[variable] - add_offset) / scale_factor
xr[variable].attrs.update(variable_attrs)
return xr.to_netcdf(path)
def to_mat(
dataframe: pd.DataFrame,
path: Union[str, Path] = "data.mat",
name: Union[str, None] = "rscd",
) -> Union[bytes, "Delayed", None]:
"""Write dataframe contents to a MATLAB file.
Parameters
----------
path: str, Path or file-like, optional
Path to which to save this dataset. File-like objects are only supported
by the scipy engine.
name: str
The name of the structure containing the data in the MATLAB file.
"""
df = dataframe.reset_index(
names="time"
) # convert the pandas index to a proper variable
df.time = 719529 + pd.to_numeric(df.time) / (
3600 * 1e9 * 24
) # 1970-01-01 + time in fractional days from nanoseconds
scipy.io.savemat(path, {name: df.to_dict("list")})
return df.to_mat(path)
def read_netcdf(filename_or_obj: Union[str, Path]) -> pd.DataFrame:
"""Open and decode a dataframe from a file or file-like object.
Parameters
----------
filename_or_obj: str, Path, file-like
Strings and Path objects are interpreted as a path to a netCDF file
or an OpenDAP URL and opened with python-netCDF4, unless the filename
ends with .gz, in which case the file is gunzipped and opened with
scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF).
Returns
-------
dataframe: pd.DataFrame
The newly created dataset.
Notes
-----
`read_netcdf` is a simple helper to load netcdf files. Please refer to
:py:func:`xarray.open_dataset` for more parameters if needs be.
"""
xr = xarray.open_dataset(filename_or_obj=filename_or_obj, mask_and_scale=True)
return xr.to_dataframe()