-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathoptimizer.py
More file actions
292 lines (236 loc) · 10.7 KB
/
optimizer.py
File metadata and controls
292 lines (236 loc) · 10.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
'''
optimizer.py -- Optimization routines for PRMS parameters and data.
'''
from __future__ import print_function
import pandas as pd
import numpy as np
import datetime as dt
import os, sys, json
from copy import deepcopy
from numpy import log10
from .data import Data
from .parameters import Parameters
from .simulation import Simulation, SimulationSeries
OPJ = os.path.join
class Optimizer:
'''
Container for a PRMS parameter optimization routine consisting of the
four stages as described in Hay, et al, 2006
(ftp://brrftp.cr.usgs.gov/pub/mows/software/luca_s/jawraHay.pdf).
Example:
>>> from prms_python import Data, Optimizer, Parameters
>>> params = Parameters('path/to/parameters')
>>> data = Data('path/to/data')
>>> control = 'path/to/control'
>>> work_directory = 'path/to/create/simulations'
>>> optr = Optimizer(params, data, control, work_directory, \
title='the title', description='desc')
>>> srad_hru = 2490
>>> optr.srad('path/to/reference_data/measured_srad.csv', srad_hru)
'''
## constant attributes for allowable range of solrad parameters
ir = (-60.0, 10.0) # PRMS dday_intcp range
sr = (0.2, 0.9) # dday_slope range
def __init__(self, parameters, data, control_file, working_dir,
title, description=None):
if isinstance(parameters, Parameters):
self.parameters = parameters
else:
raise TypeError('parameters must be instance of Parameters')
if isinstance(data, Data):
self.data = data
else:
raise TypeError('data must be instance of Data')
if not os.path.isdir(working_dir):
os.mkdir(working_dir)
## user needs to enter a title for output info now
# if not title:
# title = 'unnamed_optimization_{}'.format(\
# dt.datetime.today().strftime('%Y-%m-%d'))
self.control_file = control_file
self.working_dir = working_dir
self.title = title
self.description = description
def srad(self, reference_srad_path, station_nhru, n_sims=10, method='',\
nproc=None):
'''
Optimize the monthly dday_intcp and dday_slope parameters by one of
two methods: 'uniform' or 'random' for uniform sampling
Args:
reference_srad_path (str): path to measured solar radiation data
station_nhru (int): hru index in PRMS that is geographically
near the measured solar radiation location. You must
have swrad 'nhru' listed as a statvar output in your
control file.
Kwargs:
method (str): XXX not yet implemented- all uniform now
'uniform' or 'random'; if 'random',
intcp_delta and slope_delta are ignored, if provided
n_sims (int): number of simulations to conduct
parameter optimization/uncertaitnty analysis.
Returns:
(SradOptimizationResult)
'''
srad_start_time = dt.datetime.now()
srad_start_time = srad_start_time.replace(second=0, microsecond=0)
## shifting all monthly values by random amount from uniform distribution
## resampling degree day slope and intercept simoultaneously
intcps = [_resample_param(self.parameters['dday_intcp'], Optimizer.ir[0],\
Optimizer.ir[1]) for i in range(n_sims)]
slopes = [_resample_param(self.parameters['dday_slope'], Optimizer.sr[0],\
Optimizer.sr[1]) for i in range(n_sims)]
self.data.write(OPJ(self.working_dir, 'data'))
series = SimulationSeries(
Simulation.from_data(
self.data, _mod_params(self.parameters, intcps[i], slopes[i]),
self.control_file,
OPJ(
self.working_dir,
'intcp:{0:.3f}_slope:{1:.3f}'.format(np.mean(intcps[i]),\
np.mean(slopes[i]))
)
)
for i in range(n_sims)
)
# run all scenarios
outputs = series.run(nproc=nproc).outputs_iter()
srad_end_time = dt.datetime.now()
srad_end_time = srad_end_time.replace(second=0, microsecond=0)
def _error(x, y):
ret = abs(log10(x) - log10(y)).dropna()
ret = sum(ret)
return ret
measured_srad = pd.Series.from_csv(
reference_srad_path, parse_dates=True
)
srad_meta = {'stage' : 'swrad',
'hru_id' : station_nhru,
'optimization_title' : self.title,
'optimization_description' : self.description,
'start_time' : str(srad_start_time),
'end_time' : str(srad_end_time),
'measured_swrad' : reference_srad_path,
'sim_dirs' : [],
'original_params' : self.parameters.base_file
}
for output in outputs:
srad_meta['sim_dirs'].append(output['simulation_dir'])
json_outfile = OPJ(self.working_dir, '{0}_swrad_opt.json'.format(self.title))
with open(json_outfile, 'w') as outf:
json.dump(srad_meta, outf, sort_keys = True, indent = 4, ensure_ascii = False)
print('{0}\nOutput information sent to {1}\n'.format('-' * 80, json_outfile))
# # calculate the top performing
# errors = (
# (
# output['simulation_dir'],
# _error(measured_srad,
# output['statvar']['swrad_' + str(station_nhru)])
# )
#
# for output in outputs
# )
#
# print("directory, error (swrad)")
# for directory, error in errors:
# print(directory, error) #
#
# monthly_errors = {str(mo): [] for mo in range(12)}
#
# for directory, error in errors:
#
# month, intcp, slope = (el.split(':')[1] for el in
# directory.split(os.sep)[-1].split('_'))
#
# monthly_errors[month].append((intcp, slope, error))
#
# rankings = {
# str(mo): list(sorted(monthly_errors[str(mo)], key=lambda x: x[-1]))
# for mo in range(12)
# }
#
# tops = [(mo, rankings[str(mo)][0]) for mo in range(12)]
#
# # update internal parameters
# for top in tops:
# mo = top[0]
# self.parameters['dday_intcp'][mo] = tops[mo][1][0]
# self.parameters['dday_slope'][mo] = tops[mo][1][1]
#
# return {
# 'best': tops,
# 'all': rankings
# }
def _resample_param(param, p_min, p_max, noise_factor=0.1 ):
"""
Resample PRMS parameter by shifting all values by a constant that is
taken from a uniform distribution, where the range of the shift
values is equal to the difference between the min(max) of the parameter
set and the min(max) of the allowable range from PRMS. Next add noise
to each parameter element by adding a RV from a normal distribution
with mean 0, sigma = param allowable range / 10.
Arguments:
param (numpy.ndarray): ndarray of parameter to be resampled
p_min (float): lower bound of PRMS allowable range for param
p_max (float): upper bound of PRMS allowable range for param
noise_factor (float): factor to multiply parameter range by,
use the result as the standard deviation for the normal rand.
variable used to add element wise noise. i.e. higher
noise facter will result in higher noise added to each param
element.
Returns:
tmp (numpy.ndarry): ndarray of param after uniform random mean
shift and element-wise noise addition (normal r.v.)
"""
low_bnd = p_min - np.min(param) # lowest param value minus allowable min
up_bnd = p_max - np.max(param)
s = (p_max - p_min) * noise_factor # default is one tenth rangee (0.1)
shifted_param = np.random.uniform(low=low_bnd, high=up_bnd) + param
## add noise to each point keeping result within allowable range
while True:
tmp = shifted_param + np.random.normal(0,s,size=(np.shape(param)))
if np.max(tmp) <= p_max and np.min(tmp) >= p_min:
return tmp
def _mod_params(parameters, intcp, slope):
ret = deepcopy(parameters)
#print (intcp, slope)
ret['dday_intcp'] = intcp
ret['dday_slope'] = slope
return ret
class OptimizationResult:
def __init__(self, working_dir, stage='all'):
self.working_dir = working_dir
self.stage = stage
self.metadata_json_paths = self.get_optr_jsons(working_dir, stage)
def get_optr_jsons(self, work_dir, stage):
"""
Retrieve locations of optimization output jsons which contain
important metadata needed to understand optimization results.
Create dictionary of each optimization stage as keys, and lists
of corresponding json file paths for each stage as values.
Arguments:
work_dir (str): path to simulation directory where
optimization simulations were conducted and where
corresponding json files should exist.
stage (str): the stage ('swrad', 'pet', 'flow', etc.) of
the optimization in which to gather the jsons, if
stage is 'all' then each stage will be gathered.
Returns:
ret (dict): dictionary of stage (keys) and lists of
json file paths for that stage (values).
"""
ret = {}
if stage != 'all':
ret[stage] = [OPJ(work_dir, f) for f in\
os.listdir(work_dir) if\
f.endswith('_{0}_opt.json'.format(stage)) ]
else:
stages = ['swrad', 'pet', 'flow']
for s in stages:
ret[s] = self.get_optr_jsons(work_dir, s)
return ret
class SradOptimizationResult(OptimizationResult):
def __init__(self, working_dir, stage='swrad' ):
OptimizationResult.__init__(self, working_dir, stage)
class PetOptimizationResult(OptimizationResult):
def __init__(self, working_dir, stage='pet'):
OptimizationResult.__init__(self, working_dir, stage)