-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathoptimizer.py
More file actions
654 lines (586 loc) · 28.5 KB
/
optimizer.py
File metadata and controls
654 lines (586 loc) · 28.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
'''
optimizer.py -- Optimization routines for PRMS parameters and data.
'''
from __future__ import print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import os, sys, json, re
import multiprocessing as mp
from copy import copy
from copy import deepcopy
from numpy import log10
from .data import Data
from .parameters import Parameters
from .simulation import Simulation, SimulationSeries
from .util import load_statvar, nash_sutcliffe, percent_bias, rmse
OPJ = os.path.join
class Optimizer:
'''
Container for a PRMS parameter optimization routine consisting of
stages similar to what is described in Hay, et al, 2006
(ftp://brrftp.cr.usgs.gov/pub/mows/software/luca_s/jawraHay.pdf).
Example:
>>> from prms_python import Data, Optimizer, Parameters
>>> params = Parameters('path/to/parameters')
>>> data = Data('path/to/data')
>>> control = 'path/to/control'
>>> work_directory = 'path/to/create/simulations'
>>> optr = Optimizer(params, data, control, work_directory, \
title='the title', description='desc')
>>> measured = 'path/to/measured/csv'
>>> statvar_name = 'basin_cfs' # or any other valid statvar
>>> params_to_resample = ['dday_intcp', 'dday_slope'] # list
>>> optr.monte_carlo(measured, params_to_resample, statvar_name)
'''
#dic for min/max of parameter allowable ranges, add more when needed
param_ranges = {'dday_intcp': (-60.0, 10.0), 'dday_slope': (0.2, 0.9),\
'jh_coef': (0.005, 0.06), 'pt_alpha': (1.0, 2.0), \
'potet_coef_hru_mo': (1.0, 1.6), 'tmax_index': \
(-10.0, 110.0)\
} #changed potet max
def __init__(self, parameters, data, control_file, working_dir,
title, description=None):
if isinstance(parameters, Parameters):
self.parameters = parameters
else:
raise TypeError('parameters must be instance of Parameters')
if isinstance(data, Data):
self.data = data
else:
raise TypeError('data must be instance of Data')
input_dir = '{}'.format(os.sep).join(control_file.split(os.sep)[:-1])
if not os.path.isfile(OPJ(input_dir, 'statvar.dat')):
print('You have no statvar.dat file in your model directory')
print('Running PRMS on original data in {} for later comparison'\
.format(input_dir))
sim = Simulation(input_dir)
sim.run()
if not os.path.isdir(working_dir):
os.mkdir(working_dir)
self.input_dir = input_dir
self.control_file = control_file
self.working_dir = working_dir
self.title = title
self.description = description
self.measured_arb = None # for arbitrary output methods
self.statvar_name = None
self.arb_outputs = []
def monte_carlo(self, reference_path, param_names, statvar_name, \
stage, n_sims=10, method='uniform',\
noise_factor=0.1, nproc=None):
'''
Optimize the monthly dday_intcp and dday_slope parameters
(two key parameters in the ddsolrad module in PRMS) by one of
multiple methods (in development): Monte Carlo default method
Args:
reference_path (str): path to measured data for optimization
param_names (list): list of parameter names to resample
statvar_name (str): name of statisical variable output name for
optimization
stage (str): custom name of optimization stage e.g. 'ddsolrad'
Kwargs:
n_sims (int): number of simulations to conduct
parameter optimization/uncertaitnty analysis.
method (str): resampling method for parameters (normal or uniform)
noise_factor (float): scales the variance of noise to add to
parameter values when adding normal rv (method='normal')
nproc (int): number of processors available to run PRMS simulations
'''
if '_' in stage:
raise ValueError('stage name cannot contain an underscore')
# assign the optimization object a copy of measured srad for plots
self.measured_arb = pd.Series.from_csv(
reference_path, parse_dates=True
)
# statistical variable output name
self.statvar_name = statvar_name
start_time = dt.datetime.now()
start_time = start_time.replace(second=0, microsecond=0)
# resample params for all simulations- potential place to serialize
params = []
for name in param_names: # create list of lists of resampled params
tmp = []
for idx in range(n_sims):
tmp.append(resample_param(self.parameters, name, how=method,\
noise_factor=noise_factor))
params.append(list(tmp))
# SimulationSeries comprised of each resampled param set
series = SimulationSeries(
Simulation.from_data(
self.data, _mod_params(self.parameters,\
[params[n][i] for n in range(len(params))],\
param_names),
self.control_file,
OPJ(
self.working_dir, # name of sim: first param and mean value
'{0}:{1:.6f}'.format(param_names[0], np.mean(params[0][i]))
)
)
for i in range(n_sims)
)
# run
if not nproc: nproc = mp.cpu_count() // 2
outputs = list(series.run(nproc=nproc).outputs_iter())
self.arb_outputs.extend(outputs) # for current instance- add outputs
end_time = dt.datetime.now()
end_time = end_time.replace(second=0, microsecond=0)
# json metadata for Monte Carlo method
meta = { 'params_adjusted' : param_names,
'statvar_name' : self.statvar_name,
'optimization_title' : self.title,
'optimization_description' : self.description,
'start_time' : str(start_time),
'end_time' : str(end_time),
'measured' : reference_path,
'method' : 'Monte Carlo',
'noise_factor' : noise_factor,
'resample': method,
'sim_dirs' : [],
'stage': '{}'.format(stage),
'original_params' : self.parameters.base_file,
'nproc': nproc,
'n_sims' : n_sims
}
for output in outputs:
meta['sim_dirs'].append(output['simulation_dir'])
json_outfile = OPJ(self.working_dir, _create_metafile_name(\
self.working_dir, self.title, stage))
with open(json_outfile, 'w') as outf:
json.dump(meta, outf, sort_keys = True, indent = 4,\
ensure_ascii = False)
print('{0}\nOutput information sent to {1}\n'.\
format('-' * 80, json_outfile))
def plot_optimization(self, freq='daily', method='time_series',\
plot_vars='both', plot_1to1=True, return_fig=False,\
n_plots=4):
"""
Basic plotting of current optimization results with limited options.
Plots measured, original simluated, and optimization simulated variabes
Not recommended for plotting results when n_sims is very large, instead
use options from an OptimizationResult object, or employ a user-defined
method using the result data.
Kwargs:
freq (str): frequency of time series plots, value can be 'daily'
or 'monthly' for solar radiation
method (str): 'time_series' for time series sub plot of each
simulation alongside measured radiation. Another choice is
'correlation' which plots each measured daily solar radiation
value versus the corresponding simulated variable as subplots
one for each simulation in the optimization. With coefficients
of determiniationi i.e. square of pearson correlation coef.
plot_vars (str): what to plot alongside simulated srad:
'meas': plot simulated along with measured swrad
'orig': plot simulated along with the original simulated swrad
'both': plot simulated, with original simulation and measured
plot_1to1 (bool): if True plot one to one line on correlation
scatter plot, otherwise exclude.
return_fig (bool): flag whether to return matplotlib figure
Returns:
f (matplotlib.figure.Figure): If kwarg return_fig=True, then return
copy of the figure that is generated to the user.
"""
if not self.arb_outputs:
raise ValueError('You have not run any optimizations')
var_name = self.statvar_name
X = self.measured_arb
idx = X.index.intersection(self.arb_outputs[0]['statvar']\
['{}'.format(var_name)].index)
X = X[idx]
orig = load_statvar(OPJ(self.input_dir, 'statvar.dat'))['{}'\
.format(var_name)][idx]
meas = self.measured_arb[idx]
sims = [out['statvar']['{}'.format(var_name)][idx] for \
out in self.arb_outputs]
simdirs = [out['simulation_dir'].split(os.sep)[-1].\
replace('_', ' ') for out in self.arb_outputs]
var_name = '{}'.format(self.statvar_name)
n = len(self.arb_outputs) # number of simulations to plot
# user defined number of subplots from first n_plots results
if (n > n_plots): n = n_plots
# styles for each plot
ms = 4 # markersize for all points
orig_sty = dict(linestyle='none',markersize=ms,\
markerfacecolor='none', marker='s',\
markeredgecolor='royalblue', color='royalblue')
meas_sty = dict(linestyle='none',markersize=ms+1,\
markerfacecolor='none', marker='1',\
markeredgecolor='k', color='k')
sim_sty = dict(linestyle='none',markersize=ms,\
markerfacecolor='none', marker='o',\
markeredgecolor='r', color='r')
## number of subplots and rows (two plots per row)
nrow = n//2 # round down if odd n
ncol = 2
odd_n = False
if n/2. - nrow == 0.5:
nrow+=1 # odd number need extra row
odd_n = True
########
## Start plots depnding on key word arguments
########
if freq == 'daily' and method == 'time_series':
fig, ax = plt.subplots(n, sharex=True, sharey=True,\
figsize=(12,n*3.5))
axs = ax.ravel()
for i,sim in enumerate(sims[:n]):
if plot_vars in ('meas', 'both'):
axs[i].plot(meas, label='Measured', **meas_sty)
if plot_vars in ('orig', 'both'):
axs[i].plot(orig, label='Original sim.', **orig_sty)
axs[i].plot(sim, **sim_sty)
axs[i].set_ylabel('sim: {}'.format(simdirs[i]), fontsize=10)
if i == 0: axs[i].legend(markerscale=5, loc='best')
fig.subplots_adjust(hspace=0)
fig.autofmt_xdate()
#monthly means
elif freq == 'monthly' and method == 'time_series':
# compute monthly means
meas = meas.groupby(meas.index.month).mean()
orig = orig.groupby(orig.index.month).mean()
# change line styles for monthly plots to lines not points
for d in (orig_sty, meas_sty, sim_sty):
d['linestyle'] = '-'
d['marker'] = None
fig, ax = plt.subplots(nrows=nrow, ncols=ncol, figsize=(12,n*3.5))
axs = ax.ravel()
for i,sim in enumerate(sims[:n]):
if plot_vars in ('meas', 'both'):
axs[i].plot(meas, label='Measured', **meas_sty)
if plot_vars in ('orig', 'both'):
axs[i].plot(orig, label='Original sim.', **orig_sty)
sim = sim.groupby(sim.index.month).mean()
axs[i].plot(sim, **sim_sty)
axs[i].set_ylabel('sim: {}\nmean'.format(simdirs[i]),\
fontsize=10)
axs[i].set_xlim(0.5,12.5)
if i == 0: axs[i].legend(markerscale=5, loc='best')
if odd_n: # empty subplot if odd number of simulations
fig.delaxes(axs[n])
fig.text(0.5, 0.1, 'month')
#x-y scatter
elif method == 'correlation':
## figure
fig, ax = plt.subplots(nrows=nrow, ncols=ncol, figsize=(12,n*3))
axs = ax.ravel()
## subplot dimensions
meas_min = min(X)
meas_max = max(X)
for i, sim in enumerate(sims[:n]):
Y = sim
sim_max = max(Y)
sim_min = min(Y)
m = max(meas_max,sim_max)
if plot_1to1:
axs[i].plot([0, m], [0, m], 'k--', lw=2) ## one to one line
axs[i].set_xlim(meas_min,meas_max)
axs[i].set_ylim(sim_min, sim_max)
axs[i].plot(X, Y, **sim_sty)
axs[i].set_ylabel('sim: {}'.format(simdirs[i]))
axs[i].set_xlabel('Measured {}'.format(var_name))
axs[i].text(0.05, 0.95,r'$R^2 = {0:.2f}$'.format(\
X.corr(Y)**2), fontsize=16,\
ha='left', va='center', transform=axs[i].transAxes)
if odd_n: # empty subplot if odd number of simulations
fig.delaxes(axs[n])
if return_fig:
return fig
def _create_metafile_name(out_dir, opt_title, stage):
"""
Search through output directory where simulations are conducted
look for all metadata simulation json files and find out if the
current simulation is a replicate. Then use that information to
build the correct file name for the output json file. The series
are typically run in parallel that is why this step has to be
done after running multiple simulations from an optimization stage.
Args:
out_dir (str): path to directory with model results, i.e.
location where simulation series outputs and optimization
json files are located, aka Optimizer.working_dir
opt_title (str): optimization instance title for file search
stage (str): stage of optimization, e.g. 'swrad', 'pet'
Returns:
name (str): file name for the current optimization simulation series
metadata json file. E.g 'dry_creek_swrad_opt.json', or if
this is the second time you have run an optimization titled
'dry_creek' the next json file will be returned as
'dry_creek_swrad_opt1.json' and so on with integer increments
"""
meta_re = re.compile(r'^{}_{}_opt(\d*)\.json'.format(opt_title, stage))
reps = []
for f in os.listdir(out_dir):
if meta_re.match(f):
nrep = meta_re.match(f).group(1)
if nrep == '':
reps.append(0)
else:
reps.append(nrep)
if not reps:
name = '{}_{}_opt.json'.format(opt_title, stage)
else:
# this is the nth optimization done under the same title
n = max(map(int, reps)) + 1
name = '{}_{}_opt{}.json'.format(opt_title, stage, n)
return name
def resample_param(params, param_name, how='uniform', noise_factor=0.1):
"""
Resample PRMS parameter by shifting all values by a constant that is
taken from a uniform distribution, where the range of the uniform
values is equal to the difference between the min(max) of the parameter
set and the min(max) of the allowable range from PRMS. For parameters
that have array length <= 366 add noise to each parameter element by
adding a RV from a normal distribution with mean 0, sigma = param
allowable range / 10.
Args:
params (parameters.Parameters): parameter object
param_name (str): name of PRMS parameter to resample
Kwargs:
how (str): distribution to resample parameters from in the case
that each parameter element can be resampled (len <=366)
Currently works for uniform and normal distributions.
noise_factor (float): factor to multiply parameter range by,
use the result as the standard deviation for the normal rand.
variable used to add element wise noise. i.e. higher
noise facter will result in higher noise added to each param
element. Must be > 0.
Returns:
ret (numpy.ndarry): ndarray of param after uniform random mean
shift or element-wise noise addition (normal r.v.)
"""
p_min, p_max = Optimizer.param_ranges.get(param_name,(-1,-1))
# create dictionary of parameter basic info (not values)
param_dic = {param['name']: param for param in params.base_params}
if not param_dic.get(param_name):
raise KeyError('{} is not a valid parameter'.format(param_name))
if p_min == p_max == -1:
raise ValueError("""{} has not been added to the dictionary of
parameters to resample, add it's allowable min and max value
to the param_ranges dictionary in the resample function in
Optimizer.py""".format(param_name))
dim_case = None
nhru = params.dimensions['nhru']
ndims = param_dic.get(param_name)['ndims']
dimnames = param_dic.get(param_name)['dimnames']
length = param_dic.get(param_name)['length']
param = params[param_name]
# could expand list and check parameter name also e.g. cascade_flg
# is a parameter that should not be changed
dims_to_not_change = set(['ncascade','ncascdgw','nreach',\
'nsegment'])
if (len(set.intersection(dims_to_not_change, set(dimnames))) > 0):
raise ValueError("""{} should not be resampled as
it relates to the location of cascade flow
parameters.""".format(param_name))
# use param info to get dimension info- e.g. if multidimensional
if (ndims == 1 and length <= 366):
dim_case = 'resample_each_value' # for smaller param dimensions
elif (ndims == 1 and length > 366):
dim_case = 'resample_all_values_once' # covers nssr, ngw, etc.
elif (ndims == 2 and dimnames[1] == 'nmonths' and \
nhru == params.dimensions[dimnames[0]]):
dim_case = 'nhru_nmonths'
elif not dim_case:
raise ValueError('The {} parameter should not be resampled'.\
format(param_name))
# #testing purposes
# print('name: ', param_name)
# print('max_val: ', p_max)
# print('min_val: ', p_min)
# print('ndims: ', ndims)
# print('dimnames: ', dimnames)
# print('length: ', length)
# print('resample_method: ', dim_case)
low_bnd = p_min - np.min(param) # lowest param value minus allowable min
up_bnd = p_max - np.max(param)
s = (p_max - p_min) * noise_factor # variance noise, default: range*(1/10)
#do resampling differently based on param dimensions
if dim_case == 'resample_all_values_once':
if how == 'uniform':
#uniform RV for shifting all values once
shifted_param = np.random.uniform(low=low_bnd, high=up_bnd) + param
ret = shifted_param
elif how == 'normal':
while True:
tmp = np.random.normal(0, s, size=param.shape) + param
if np.max(tmp) <= p_max and np.min(tmp) >= p_min:
ret = tmp
break
elif dim_case == 'resample_each_value':
ret = copy(param)
if how == 'uniform':
for i, el in enumerate(param):
low_bnd = p_min - np.min(el) # a,b for uniform RV to add
up_bnd = p_max - np.max(el)
ret[i] = el + np.random.uniform(low=low_bnd, high=up_bnd)
elif how == 'normal':
for i, el in enumerate(param):
while True:
low_bnd = p_min - np.min(el)
up_bnd = p_max - np.max(el)
tmp = el + np.random.normal(0, s)
if np.max(tmp) <= p_max and np.min(tmp) >= p_min:
ret[i] = tmp
break
# nhru by nmonth dimensional params
elif dim_case == 'nhru_nmonths':
ret = copy(param)
if how == 'uniform':
rvs = [np.random.uniform(low=low_bnd, high=up_bnd)\
for i in range(12)]
for month in range(12):
ret[month] += rvs[month]
elif how == 'normal':
for i, el in enumerate(param):
while True:
low_bnd = p_min - np.min(el)
up_bnd = p_max - np.max(el)
tmp = el + np.random.normal(0, s)
if np.max(tmp) <= p_max and np.min(tmp) >= p_min:
ret[i] = tmp
break
return ret
def _mod_params(parameters, params, param_names):
ret = copy(parameters)
for idx, param in enumerate(params):
ret[param_names[idx]] = param
return ret
class OptimizationResult:
def __init__(self, working_dir, stage):
self.working_dir = working_dir
self.stage = stage
self.metadata_json_paths = self._get_optr_jsons(working_dir, stage)
self.statvar_name = self.get_statvar_name(stage)
self.measured = self.get_measured(stage)
self.input_dir = self._get_input_dir(stage)
self.input_params = self._get_input_params(stage)
# if there are more than one input param for given stage
if len(self.input_params) > 1:
print(
"""Warning: there were more than one initial parameter sets used for the
optimization for stage: {}. Make sure to compare the the correct input
params with their corresponding output sims.""".format(stage))
print('\nThis optimization stage used the\
following input parameter files:\n{}'.format('\n'.join(self.input_params)))
def _get_optr_jsons(self, work_dir, stage):
"""
Retrieve locations of optimization output jsons which contain
metadata needed to understand optimization results.
Create dictionary of each optimization with stage as key and lists
of corresponding json file paths as values.
Arguments:
work_dir (str): path to directory with model results, i.e.
location where simulation series outputs and optimization
json files are located, aka Optimizer.working_dir
stage (str): the stage ('ddsolrad', 'jhpet', 'flow', etc.) of
the optimization in which to gather the jsons
Returns:
ret (dict): dictionary of stage (keys) and lists of
json file paths for that stage (values).
"""
ret = {}
optr_metafile_re = re.compile(r'^.*_{}_opt(\d*)\.json'.\
format(stage))
ret[stage] = [OPJ(work_dir, f) for f in\
os.listdir(work_dir) if\
optr_metafile_re.match(f)]
return ret
def _get_input_dir(self, stage):
# retrieves the input directory from the first json file of given stage
json_file = self.metadata_json_paths[stage][0]
with open(json_file) as jf:
meta_dic = json.load(jf)
return '{}'.format(os.sep).join(meta_dic['original_params'].\
split(os.sep)[:-1])
def _get_input_params(self, stage):
json_files = self.metadata_json_paths[stage]
param_paths = []
for json_file in json_files:
with open(json_file) as jf:
meta_dic = json.load(jf)
param_paths.append(meta_dic['original_params'])
return list(set(param_paths))
def get_sim_dirs(self, stage):
jsons = self.metadata_json_paths[stage]
json_files = []
sim_dirs = []
for inf in jsons:
with open(inf) as json_file:
json_files.append(json.load(json_file))
for json_file in json_files:
sim_dirs.extend(json_file['sim_dirs'])
# list of all simulation directory paths for stage
return sim_dirs
def get_measured(self, stage):
# only need to open one json file to get this information
if not self.metadata_json_paths.get(stage):
return # no optimization json files exist for given stage
first_json = self.metadata_json_paths[stage][0]
with open(first_json) as json_file:
json_data = json.load(json_file)
measured_series = pd.Series.from_csv(json_data.get('measured'),\
parse_dates=True)
return measured_series
def get_statvar_name(self, stage):
# only need to open one json file to get this information
try:
first_json = self.metadata_json_paths[stage][0]
except:
raise ValueError("""No optimizatin has been run for
stage: {}""".format(stage))
with open(first_json) as json_file:
json_data = json.load(json_file)
var_name = json_data.get('statvar_name')
return var_name
def result_table(self, freq='daily', top_n=5, latex=False):
##TODO: add stats for freq options monthly, annual (means or sum)
sim_dirs = self.get_sim_dirs(self.stage)
if top_n >= len(sim_dirs): top_n = len(sim_dirs)
sim_names = [path.split(os.sep)[-1] for path in sim_dirs]
meas_var = self.get_measured(self.stage)
statvar_name = self.get_statvar_name(self.stage)
result_df = pd.DataFrame(columns=\
['NSE','RMSE','PBIAS','COEF_DET','ABS(PBIAS)'])
for i, sim in enumerate(sim_dirs):
sim_out = load_statvar(OPJ(sim, 'outputs', 'statvar.dat'))\
['{}'.format(statvar_name)]
idx = meas_var.index.intersection(sim_out.index)
meas_var = copy(meas_var[idx])
sim_out = sim_out[idx]
if freq == 'daily':
result_df.loc[sim_names[i]] = [\
nash_sutcliffe(meas_var, sim_out),\
rmse(meas_var, sim_out),\
percent_bias(meas_var,sim_out),\
meas_var.corr(sim_out)**2,\
np.abs(percent_bias(meas_var, sim_out)) ]
elif freq == 'monthly':
meas_mo = meas_var.groupby(meas_var.index.month).mean()
sim_out = sim_out.groupby(sim_out.index.month).mean()
result_df.loc[sim_names[i]] = [\
nash_sutcliffe(meas_mo, sim_out),\
rmse(meas_mo, sim_out),\
percent_bias(meas_mo, sim_out),\
meas_mo.corr(sim_out)**2,\
np.abs(percent_bias(meas_mo, sim_out)) ]
sorted_result = result_df.sort_values(by=['NSE','RMSE','ABS(PBIAS)',\
'COEF_DET'], ascending=[False,True,True,False])
sorted_result.columns.name = '{} parameters'.format(self.stage)
sorted_result = sorted_result[['NSE','RMSE','PBIAS','COEF_DET']]
if latex: return sorted_result[:top_n].to_latex(escape=False)
else: return sorted_result[:top_n]
def get_top_ranked_sims(self, sorted_df):
## use result table to make dic with best param and statvar paths
# index of table is the simulation directory names
ret = {
'dir_name' : [],
'param_path' : [],
'statvar_path' : []
}
for i,el in enumerate(sorted_df.index):
ret['dir_name'].append(el)
ret['param_path'].append(OPJ(self.working_dir,el,'inputs',\
'parameters'))
ret['statvar_path'].append(OPJ(self.working_dir,el,'outputs',\
'statvar.dat'))
return ret