|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +GrADS相关 |
| 4 | +
|
| 5 | +@Time : 2020/9/20 9:52 |
| 6 | +@Author : modabao |
| 7 | +""" |
| 8 | + |
| 9 | +import sys |
| 10 | +from datetime import datetime |
| 11 | + |
| 12 | +import numpy as np |
| 13 | +import pandas as pd |
| 14 | +from osgeo import osr |
| 15 | +import xarray as xr |
| 16 | + |
| 17 | + |
| 18 | +class GrADS(object): |
| 19 | + """ |
| 20 | + 根据ctl文件解析binary数据文件 |
| 21 | + """ |
| 22 | + def __init__(self, ctl_path=None, bin_path=None): |
| 23 | + self.ctl_path = ctl_path |
| 24 | + self.bin_path = bin_path |
| 25 | + if ctl_path: |
| 26 | + self.__ctl = {} |
| 27 | + self.__parse() |
| 28 | + |
| 29 | + def set_ctl_path(self, ctl_path): |
| 30 | + self.ctl_path = ctl_path |
| 31 | + if ctl_path: |
| 32 | + self.__ctl = {} |
| 33 | + self.__parse() |
| 34 | + |
| 35 | + def set_bin_path(self, bin_path): |
| 36 | + self.bin_path = bin_path |
| 37 | + |
| 38 | + def __parse(self): |
| 39 | + """ |
| 40 | + """ |
| 41 | + with open(self.ctl_path) as f: |
| 42 | + ctl_lines = f.readlines() |
| 43 | + self.ctl_lines = ctl_lines |
| 44 | + checked = -1 |
| 45 | + for n, line in enumerate(ctl_lines): |
| 46 | + # if this line (has been parsed) or (is empty) or (starts with '*'), skip |
| 47 | + if (n <= checked) or (not (line := line.strip())) or (line[0] == '*'): |
| 48 | + continue |
| 49 | + first_word, *rest_words = line.lower().split(maxsplit=1) |
| 50 | + if first_word == 'options': |
| 51 | + self.__options(*rest_words) |
| 52 | + elif first_word == 'undef': |
| 53 | + self.__undef(*rest_words) |
| 54 | + elif first_word == 'pdef': |
| 55 | + self.__pdef(*rest_words) |
| 56 | + elif first_word in ('xdef', 'ydef'): |
| 57 | + self.__xydef(*rest_words, first_word) |
| 58 | + elif first_word == 'zdef': |
| 59 | + checked = self.__zdef(*rest_words, n) |
| 60 | + elif first_word == 'tdef': |
| 61 | + self.__tdef(*rest_words) |
| 62 | + elif first_word == 'vars': |
| 63 | + checked = self.__vars(*rest_words, n) |
| 64 | + |
| 65 | + def __options(self, rest_words): |
| 66 | + if 'big_endian' in rest_words: |
| 67 | + self.__ctl['dtype'] = '>f4' |
| 68 | + elif 'little_endian' in rest_words: |
| 69 | + self.__ctl['dtype'] = '<f4' |
| 70 | + elif 'byteswapped' in rest_words: |
| 71 | + self.__ctl['dtype'] = '>f4' if sys.byteorder == 'big' else '<f4' |
| 72 | + if 'template' in rest_words: |
| 73 | + self.__ctl['template'] = True |
| 74 | + |
| 75 | + def __undef(self, rest_words): |
| 76 | + self.__ctl['undef'] = float(rest_words) |
| 77 | + |
| 78 | + def __pdef(self, rest_words): |
| 79 | + isize, jsize, prj, latref, lonref, iref, jref, slat, nlat, standard_lon, dx, dy = \ |
| 80 | + rest_words.split() |
| 81 | + assert prj == 'lcc' |
| 82 | + isize, jsize = int(isize), int(jsize) |
| 83 | + latref, lonref, iref, jref, slat, nlat, standard_lon, dx, dy = ( |
| 84 | + float(x) for x in [latref, lonref, iref, jref, slat, nlat, standard_lon, dx, dy] |
| 85 | + ) |
| 86 | + lcc = osr.SpatialReference() |
| 87 | + lcc.SetLCC(slat, nlat, latref, lonref, (iref - 1) * dx, (jref - 1) * dy) |
| 88 | + lcc.SetLinearUnitsAndUpdateParameters('kilometre', dx) |
| 89 | + geo = lcc.CloneGeogCS() |
| 90 | + lcc2geo = osr.CoordinateTransformation(lcc, geo) |
| 91 | + geo2lcc = osr.CoordinateTransformation(geo, lcc) |
| 92 | + self.__ctl['pdef'] = {'lcc2geo': lcc2geo, 'geo2lcc': geo2lcc} |
| 93 | + self.__ctl['x_size'], self.__ctl['y_size'] = isize, jsize |
| 94 | + self.__ctl['x'], self.__ctl['y'] = np.arange(isize), np.arange(jsize) |
| 95 | + |
| 96 | + def __xydef(self, rest_words, first_word): |
| 97 | + n, linear, start, step = rest_words.split() |
| 98 | + assert linear == 'linear' |
| 99 | + n = int(n) |
| 100 | + start = float(start) |
| 101 | + step = float(step) |
| 102 | + key = 'lat' if first_word == 'ydef' else 'lon' |
| 103 | + self.__ctl[key] = np.linspace(start, start + (n - 1) * step, n) |
| 104 | + if 'pdef' in self.__ctl: |
| 105 | + return |
| 106 | + else: |
| 107 | + self.__ctl[f'{first_word[0]}_size'] = n |
| 108 | + |
| 109 | + def __zdef(self, rest_words, n): |
| 110 | + znum, levels, *value_list = rest_words.split() |
| 111 | + assert levels == 'levels' |
| 112 | + znum = int(znum) |
| 113 | + if znum == len(value_list): |
| 114 | + value_list = np.fromiter(value_list, dtype=np.float) |
| 115 | + checked = n |
| 116 | + else: |
| 117 | + checked = n + znum |
| 118 | + value_list = np.fromiter(self.ctl_lines[n + 1: checked + 1], np.float) |
| 119 | + self.__ctl['zdef'] = value_list |
| 120 | + return checked |
| 121 | + |
| 122 | + def __tdef(self, rest_words): |
| 123 | + """ |
| 124 | + TODO: not all circumstances have been handled |
| 125 | + Args: |
| 126 | + rest_words: |
| 127 | +
|
| 128 | + Returns: |
| 129 | +
|
| 130 | + """ |
| 131 | + ns_t, linear, t_start, dt = rest_words.split() |
| 132 | + assert linear == 'linear' |
| 133 | + ns_t = int(ns_t) |
| 134 | + t_start = datetime.strptime(t_start, '%HZ%d%b%Y') |
| 135 | + unit_map = {'mn': 'min', 'hr': 'H', 'dy': 'D', 'mo': 'M', 'yr': 'Y'} |
| 136 | + dt = f'{dt[:-2]}{unit_map[dt[-2:]]}' |
| 137 | + t = pd.date_range(start=t_start, periods=ns_t, freq=dt) |
| 138 | + self.__ctl['tdef'] = t |
| 139 | + |
| 140 | + def __vars(self, rest_words, n): |
| 141 | + n_var = int(rest_words) |
| 142 | + variables = [{'name': y[0], 'layers': int(y[1]), 'describe': y[3]} |
| 143 | + for x in self.ctl_lines[n + 1: n + n_var + 1] |
| 144 | + if (y := x.split(maxsplit=3))] |
| 145 | + variables = pd.DataFrame(variables).set_index('name') |
| 146 | + self.__ctl['one_time_levels'] = variables['layers'].sum() |
| 147 | + variables['layer_start'] = variables['layers'].cumsum() - variables['layers'] |
| 148 | + variables['count'] = variables['layers'] * self.__ctl['x_size'] * self.__ctl['y_size'] |
| 149 | + self.__ctl['vars'] = variables |
| 150 | + return n + n_var |
| 151 | + |
| 152 | + def reset_time(self, time=None, start_time=None): |
| 153 | + """ |
| 154 | +
|
| 155 | + Args: |
| 156 | + time: pd.DatetimeIndex |
| 157 | + start_time: datetime.datetime |
| 158 | +
|
| 159 | + Returns: |
| 160 | + None |
| 161 | +
|
| 162 | + """ |
| 163 | + assert 'tdef' in self.__ctl |
| 164 | + if isinstance(time, pd.DatetimeIndex): |
| 165 | + self.__ctl['tdef'] = time |
| 166 | + elif start_time: |
| 167 | + self.__ctl['tdef'] += start_time - self.__ctl['tdef'][0] |
| 168 | + |
| 169 | + def reset_dtype(self, dtype): |
| 170 | + if dtype in ('>f4', '<f4'): |
| 171 | + print(f'dtype changes from "{self.__ctl["dtype"]}" to "{dtype}"') |
| 172 | + self.__ctl['dtype'] = dtype |
| 173 | + else: |
| 174 | + raise ValueError('dtype must in {">f4", "<f4"}') |
| 175 | + |
| 176 | + @property |
| 177 | + def ctl(self): |
| 178 | + return self.__ctl.copy() |
| 179 | + |
| 180 | + def get(self, name, time=None): |
| 181 | + """ |
| 182 | + TODO: lazy loading using dask |
| 183 | + Args: |
| 184 | + name: |
| 185 | + time: |
| 186 | +
|
| 187 | + Returns: |
| 188 | +
|
| 189 | + """ |
| 190 | + if isinstance(time, int): |
| 191 | + return self.get_element_by_time_id(name, time) |
| 192 | + if time is None: |
| 193 | + time = range(len(self.__ctl['tdef'])) |
| 194 | + if hasattr(time, '__iter__'): |
| 195 | + element = xr.concat((self.get_element_by_time_id(name, time_id) for time_id in time), |
| 196 | + dim='time') |
| 197 | + return element |
| 198 | + |
| 199 | + def get_element_by_time_id(self, name, time_id): |
| 200 | + """ |
| 201 | + 读取一个要素一个时次 |
| 202 | +
|
| 203 | + Args: |
| 204 | + name: 要素名称 |
| 205 | + time_id: 时次 |
| 206 | +
|
| 207 | + Returns: |
| 208 | +
|
| 209 | + """ |
| 210 | + level_start = self.__ctl['vars'].loc[name, 'layer_start'] |
| 211 | + levels = self.__ctl['vars'].loc[name, 'layers'] |
| 212 | + count = self.__ctl['vars'].loc[name, 'count'] |
| 213 | + offset = ((time_id * self.__ctl['one_time_levels'] + level_start) * |
| 214 | + self.__ctl['x_size'] * self.__ctl['y_size'] * 4) |
| 215 | + shape = 1, levels, self.__ctl['y_size'], self.__ctl['x_size'] |
| 216 | + element = np.fromfile(self.bin_path, dtype=self.__ctl['dtype'], |
| 217 | + count=count, offset=offset).reshape(shape) |
| 218 | + y, x = ('y', 'x') if 'pdef' in self.__ctl else ('lat', 'lon') |
| 219 | + coords = (('time', self.__ctl['tdef'][[time_id]]), ('level', self.__ctl['zdef']), |
| 220 | + (y, self.__ctl[y]), (x, self.__ctl[x])) |
| 221 | + element = xr.DataArray(element, coords=coords, name=name) |
| 222 | + return element.where(element != self.__ctl['undef']) |
0 commit comments