PRMS-Python/prms_python/optimizer.py at issue18 · PRMS-Python/PRMS-Python

History

654 lines (586 loc) · 28.5 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

'''

optimizer.py -- Optimization routines for PRMS parameters and data.

'''

from __future__ import print_function

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import datetime as dt

import os, sys, json, re

import multiprocessing as mp

from copy import copy

from copy import deepcopy

from numpy import log10

from .data import Data

from .parameters import Parameters

from .simulation import Simulation, SimulationSeries

from .util import load_statvar, nash_sutcliffe, percent_bias, rmse

OPJ = os.path.join

class Optimizer:

'''

Container for a PRMS parameter optimization routine consisting of

stages similar to what is described in Hay, et al, 2006

(ftp://brrftp.cr.usgs.gov/pub/mows/software/luca_s/jawraHay.pdf).

Example:

>>> from prms_python import Data, Optimizer, Parameters

>>> params = Parameters('path/to/parameters')

>>> data = Data('path/to/data')

>>> control = 'path/to/control'

>>> work_directory = 'path/to/create/simulations'

>>> optr = Optimizer(params, data, control, work_directory, \

title='the title', description='desc')

>>> measured = 'path/to/measured/csv'

>>> statvar_name = 'basin_cfs' # or any other valid statvar

>>> params_to_resample = ['dday_intcp', 'dday_slope'] # list

>>> optr.monte_carlo(measured, params_to_resample, statvar_name)

'''

#dic for min/max of parameter allowable ranges, add more when needed

param_ranges = {'dday_intcp': (-60.0, 10.0), 'dday_slope': (0.2, 0.9),\

'jh_coef': (0.005, 0.06), 'pt_alpha': (1.0, 2.0), \

'potet_coef_hru_mo': (1.0, 1.6), 'tmax_index': \

(-10.0, 110.0)\

} #changed potet max

def __init__(self, parameters, data, control_file, working_dir,

title, description=None):

if isinstance(parameters, Parameters):

self.parameters = parameters

else:

raise TypeError('parameters must be instance of Parameters')

if isinstance(data, Data):

self.data = data

else:

raise TypeError('data must be instance of Data')

input_dir = '{}'.format(os.sep).join(control_file.split(os.sep)[:-1])

if not os.path.isfile(OPJ(input_dir, 'statvar.dat')):

print('You have no statvar.dat file in your model directory')

print('Running PRMS on original data in {} for later comparison'\

.format(input_dir))

sim = Simulation(input_dir)

sim.run()

if not os.path.isdir(working_dir):

os.mkdir(working_dir)

self.input_dir = input_dir

self.control_file = control_file

self.working_dir = working_dir

self.title = title

self.description = description

self.measured_arb = None # for arbitrary output methods

self.statvar_name = None

self.arb_outputs = []

def monte_carlo(self, reference_path, param_names, statvar_name, \

stage, n_sims=10, method='uniform',\

noise_factor=0.1, nproc=None):

'''

Optimize the monthly dday_intcp and dday_slope parameters

(two key parameters in the ddsolrad module in PRMS) by one of

multiple methods (in development): Monte Carlo default method

Args:

reference_path (str): path to measured data for optimization

param_names (list): list of parameter names to resample

statvar_name (str): name of statisical variable output name for

optimization

stage (str): custom name of optimization stage e.g. 'ddsolrad'

Kwargs:

n_sims (int): number of simulations to conduct

parameter optimization/uncertaitnty analysis.

method (str): resampling method for parameters (normal or uniform)

noise_factor (float): scales the variance of noise to add to

parameter values when adding normal rv (method='normal')

nproc (int): number of processors available to run PRMS simulations

'''

if '_' in stage:

raise ValueError('stage name cannot contain an underscore')

# assign the optimization object a copy of measured srad for plots

self.measured_arb = pd.Series.from_csv(

reference_path, parse_dates=True

)

# statistical variable output name

self.statvar_name = statvar_name

start_time = dt.datetime.now()

start_time = start_time.replace(second=0, microsecond=0)

# resample params for all simulations- potential place to serialize

params = []

for name in param_names: # create list of lists of resampled params

tmp = []

for idx in range(n_sims):

tmp.append(resample_param(self.parameters, name, how=method,\

noise_factor=noise_factor))

params.append(list(tmp))

# SimulationSeries comprised of each resampled param set

series = SimulationSeries(

Simulation.from_data(

self.data, _mod_params(self.parameters,\

[params[n][i] for n in range(len(params))],\

param_names),

self.control_file,

OPJ(

self.working_dir, # name of sim: first param and mean value

'{0}:{1:.6f}'.format(param_names[0], np.mean(params[0][i]))

)

for i in range(n_sims)

)

# run

if not nproc: nproc = mp.cpu_count() // 2

outputs = list(series.run(nproc=nproc).outputs_iter())

self.arb_outputs.extend(outputs) # for current instance- add outputs

end_time = dt.datetime.now()

end_time = end_time.replace(second=0, microsecond=0)

# json metadata for Monte Carlo method

meta = { 'params_adjusted' : param_names,

'statvar_name' : self.statvar_name,

'optimization_title' : self.title,

'optimization_description' : self.description,

'start_time' : str(start_time),

'end_time' : str(end_time),

'measured' : reference_path,

'method' : 'Monte Carlo',

'noise_factor' : noise_factor,

'resample': method,

'sim_dirs' : [],

'stage': '{}'.format(stage),

'original_params' : self.parameters.base_file,

'nproc': nproc,

'n_sims' : n_sims

}

for output in outputs:

meta['sim_dirs'].append(output['simulation_dir'])

json_outfile = OPJ(self.working_dir, _create_metafile_name(\

self.working_dir, self.title, stage))

with open(json_outfile, 'w') as outf:

json.dump(meta, outf, sort_keys = True, indent = 4,\

ensure_ascii = False)

print('{0}\nOutput information sent to {1}\n'.\

format('-' * 80, json_outfile))

def plot_optimization(self, freq='daily', method='time_series',\

plot_vars='both', plot_1to1=True, return_fig=False,\

n_plots=4):

"""

Basic plotting of current optimization results with limited options.

Plots measured, original simluated, and optimization simulated variabes

Not recommended for plotting results when n_sims is very large, instead

use options from an OptimizationResult object, or employ a user-defined

method using the result data.

Kwargs:

freq (str): frequency of time series plots, value can be 'daily'

or 'monthly' for solar radiation

method (str): 'time_series' for time series sub plot of each

simulation alongside measured radiation. Another choice is

'correlation' which plots each measured daily solar radiation

value versus the corresponding simulated variable as subplots

one for each simulation in the optimization. With coefficients

of determiniationi i.e. square of pearson correlation coef.

plot_vars (str): what to plot alongside simulated srad:

'meas': plot simulated along with measured swrad

'orig': plot simulated along with the original simulated swrad

'both': plot simulated, with original simulation and measured

plot_1to1 (bool): if True plot one to one line on correlation

scatter plot, otherwise exclude.

return_fig (bool): flag whether to return matplotlib figure

Returns:

f (matplotlib.figure.Figure): If kwarg return_fig=True, then return

copy of the figure that is generated to the user.

"""

if not self.arb_outputs:

raise ValueError('You have not run any optimizations')

var_name = self.statvar_name

X = self.measured_arb

idx = X.index.intersection(self.arb_outputs[0]['statvar']\

['{}'.format(var_name)].index)

X = X[idx]

orig = load_statvar(OPJ(self.input_dir, 'statvar.dat'))['{}'\

.format(var_name)][idx]

meas = self.measured_arb[idx]

sims = [out['statvar']['{}'.format(var_name)][idx] for \

out in self.arb_outputs]

simdirs = [out['simulation_dir'].split(os.sep)[-1].\

replace('_', ' ') for out in self.arb_outputs]

var_name = '{}'.format(self.statvar_name)

n = len(self.arb_outputs) # number of simulations to plot

# user defined number of subplots from first n_plots results

if (n > n_plots): n = n_plots

# styles for each plot

ms = 4 # markersize for all points

orig_sty = dict(linestyle='none',markersize=ms,\

markerfacecolor='none', marker='s',\

markeredgecolor='royalblue', color='royalblue')

meas_sty = dict(linestyle='none',markersize=ms+1,\

markerfacecolor='none', marker='1',\

markeredgecolor='k', color='k')

sim_sty = dict(linestyle='none',markersize=ms,\

markerfacecolor='none', marker='o',\

markeredgecolor='r', color='r')

## number of subplots and rows (two plots per row)

nrow = n//2 # round down if odd n

ncol = 2

odd_n = False

if n/2. - nrow == 0.5:

nrow+=1 # odd number need extra row

odd_n = True

########

## Start plots depnding on key word arguments

########

if freq == 'daily' and method == 'time_series':

fig, ax = plt.subplots(n, sharex=True, sharey=True,\

figsize=(12,n*3.5))

axs = ax.ravel()

for i,sim in enumerate(sims[:n]):

if plot_vars in ('meas', 'both'):

axs[i].plot(meas, label='Measured', **meas_sty)

if plot_vars in ('orig', 'both'):

axs[i].plot(orig, label='Original sim.', **orig_sty)

axs[i].plot(sim, **sim_sty)

axs[i].set_ylabel('sim: {}'.format(simdirs[i]), fontsize=10)

if i == 0: axs[i].legend(markerscale=5, loc='best')

fig.subplots_adjust(hspace=0)

fig.autofmt_xdate()

#monthly means

elif freq == 'monthly' and method == 'time_series':

# compute monthly means

meas = meas.groupby(meas.index.month).mean()

orig = orig.groupby(orig.index.month).mean()

# change line styles for monthly plots to lines not points

for d in (orig_sty, meas_sty, sim_sty):

d['linestyle'] = '-'

d['marker'] = None

fig, ax = plt.subplots(nrows=nrow, ncols=ncol, figsize=(12,n*3.5))

axs = ax.ravel()

for i,sim in enumerate(sims[:n]):

if plot_vars in ('meas', 'both'):

axs[i].plot(meas, label='Measured', **meas_sty)

if plot_vars in ('orig', 'both'):

axs[i].plot(orig, label='Original sim.', **orig_sty)

sim = sim.groupby(sim.index.month).mean()

axs[i].plot(sim, **sim_sty)

axs[i].set_ylabel('sim: {}\nmean'.format(simdirs[i]),\

fontsize=10)

axs[i].set_xlim(0.5,12.5)

if i == 0: axs[i].legend(markerscale=5, loc='best')

if odd_n: # empty subplot if odd number of simulations

fig.delaxes(axs[n])

fig.text(0.5, 0.1, 'month')

#x-y scatter

elif method == 'correlation':

## figure

fig, ax = plt.subplots(nrows=nrow, ncols=ncol, figsize=(12,n*3))

axs = ax.ravel()

## subplot dimensions

meas_min = min(X)

meas_max = max(X)

for i, sim in enumerate(sims[:n]):

Y = sim

sim_max = max(Y)

sim_min = min(Y)

m = max(meas_max,sim_max)

if plot_1to1:

axs[i].plot([0, m], [0, m], 'k--', lw=2) ## one to one line

axs[i].set_xlim(meas_min,meas_max)

axs[i].set_ylim(sim_min, sim_max)

axs[i].plot(X, Y, **sim_sty)

axs[i].set_ylabel('sim: {}'.format(simdirs[i]))

axs[i].set_xlabel('Measured {}'.format(var_name))

axs[i].text(0.05, 0.95,r'$R^2 = {0:.2f}$'.format(\

X.corr(Y)**2), fontsize=16,\

ha='left', va='center', transform=axs[i].transAxes)

if odd_n: # empty subplot if odd number of simulations

fig.delaxes(axs[n])

if return_fig:

return fig

def _create_metafile_name(out_dir, opt_title, stage):

"""

Search through output directory where simulations are conducted

look for all metadata simulation json files and find out if the

current simulation is a replicate. Then use that information to

build the correct file name for the output json file. The series

are typically run in parallel that is why this step has to be

done after running multiple simulations from an optimization stage.

Args:

out_dir (str): path to directory with model results, i.e.

location where simulation series outputs and optimization

json files are located, aka Optimizer.working_dir

opt_title (str): optimization instance title for file search

stage (str): stage of optimization, e.g. 'swrad', 'pet'

Returns:

name (str): file name for the current optimization simulation series

metadata json file. E.g 'dry_creek_swrad_opt.json', or if

this is the second time you have run an optimization titled

'dry_creek' the next json file will be returned as

'dry_creek_swrad_opt1.json' and so on with integer increments

"""

meta_re = re.compile(r'^{}_{}_opt(\d*)\.json'.format(opt_title, stage))

reps = []

for f in os.listdir(out_dir):

if meta_re.match(f):

nrep = meta_re.match(f).group(1)

if nrep == '':

reps.append(0)

else:

reps.append(nrep)

if not reps:

name = '{}_{}_opt.json'.format(opt_title, stage)

else:

# this is the nth optimization done under the same title

n = max(map(int, reps)) + 1

name = '{}_{}_opt{}.json'.format(opt_title, stage, n)

return name

def resample_param(params, param_name, how='uniform', noise_factor=0.1):

"""

Resample PRMS parameter by shifting all values by a constant that is

taken from a uniform distribution, where the range of the uniform

values is equal to the difference between the min(max) of the parameter

set and the min(max) of the allowable range from PRMS. For parameters

that have array length <= 366 add noise to each parameter element by

adding a RV from a normal distribution with mean 0, sigma = param

allowable range / 10.

Args:

params (parameters.Parameters): parameter object

param_name (str): name of PRMS parameter to resample

Kwargs:

how (str): distribution to resample parameters from in the case

that each parameter element can be resampled (len <=366)

Currently works for uniform and normal distributions.

noise_factor (float): factor to multiply parameter range by,

use the result as the standard deviation for the normal rand.

variable used to add element wise noise. i.e. higher

noise facter will result in higher noise added to each param

element. Must be > 0.

Returns:

ret (numpy.ndarry): ndarray of param after uniform random mean

shift or element-wise noise addition (normal r.v.)

"""

p_min, p_max = Optimizer.param_ranges.get(param_name,(-1,-1))

# create dictionary of parameter basic info (not values)

param_dic = {param['name']: param for param in params.base_params}

if not param_dic.get(param_name):

raise KeyError('{} is not a valid parameter'.format(param_name))

if p_min == p_max == -1:

raise ValueError("""{} has not been added to the dictionary of

parameters to resample, add it's allowable min and max value

to the param_ranges dictionary in the resample function in

Optimizer.py""".format(param_name))

dim_case = None

nhru = params.dimensions['nhru']

ndims = param_dic.get(param_name)['ndims']

dimnames = param_dic.get(param_name)['dimnames']

length = param_dic.get(param_name)['length']

param = params[param_name]

# could expand list and check parameter name also e.g. cascade_flg

# is a parameter that should not be changed

dims_to_not_change = set(['ncascade','ncascdgw','nreach',\

'nsegment'])

if (len(set.intersection(dims_to_not_change, set(dimnames))) > 0):

raise ValueError("""{} should not be resampled as

it relates to the location of cascade flow

parameters.""".format(param_name))

# use param info to get dimension info- e.g. if multidimensional

if (ndims == 1 and length <= 366):

dim_case = 'resample_each_value' # for smaller param dimensions

elif (ndims == 1 and length > 366):

dim_case = 'resample_all_values_once' # covers nssr, ngw, etc.

elif (ndims == 2 and dimnames[1] == 'nmonths' and \

nhru == params.dimensions[dimnames[0]]):

dim_case = 'nhru_nmonths'

elif not dim_case:

raise ValueError('The {} parameter should not be resampled'.\

format(param_name))

# #testing purposes

# print('name: ', param_name)

# print('max_val: ', p_max)

# print('min_val: ', p_min)

# print('ndims: ', ndims)

# print('dimnames: ', dimnames)

# print('length: ', length)

# print('resample_method: ', dim_case)

low_bnd = p_min - np.min(param) # lowest param value minus allowable min

up_bnd = p_max - np.max(param)

s = (p_max - p_min) * noise_factor # variance noise, default: range*(1/10)

#do resampling differently based on param dimensions

if dim_case == 'resample_all_values_once':

if how == 'uniform':

#uniform RV for shifting all values once

shifted_param = np.random.uniform(low=low_bnd, high=up_bnd) + param

ret = shifted_param

elif how == 'normal':

while True:

tmp = np.random.normal(0, s, size=param.shape) + param

if np.max(tmp) <= p_max and np.min(tmp) >= p_min:

ret = tmp

break

elif dim_case == 'resample_each_value':

ret = copy(param)

if how == 'uniform':

for i, el in enumerate(param):

low_bnd = p_min - np.min(el) # a,b for uniform RV to add

up_bnd = p_max - np.max(el)

ret[i] = el + np.random.uniform(low=low_bnd, high=up_bnd)

elif how == 'normal':

for i, el in enumerate(param):

while True:

low_bnd = p_min - np.min(el)

up_bnd = p_max - np.max(el)

tmp = el + np.random.normal(0, s)

if np.max(tmp) <= p_max and np.min(tmp) >= p_min:

ret[i] = tmp

break

# nhru by nmonth dimensional params

elif dim_case == 'nhru_nmonths':

ret = copy(param)

if how == 'uniform':

rvs = [np.random.uniform(low=low_bnd, high=up_bnd)\

for i in range(12)]

for month in range(12):

ret[month] += rvs[month]

elif how == 'normal':

for i, el in enumerate(param):

while True:

low_bnd = p_min - np.min(el)

up_bnd = p_max - np.max(el)

tmp = el + np.random.normal(0, s)

if np.max(tmp) <= p_max and np.min(tmp) >= p_min:

ret[i] = tmp

break

return ret

def _mod_params(parameters, params, param_names):

ret = copy(parameters)

for idx, param in enumerate(params):

ret[param_names[idx]] = param

return ret

class OptimizationResult:

def __init__(self, working_dir, stage):

self.working_dir = working_dir

self.stage = stage

self.metadata_json_paths = self._get_optr_jsons(working_dir, stage)

self.statvar_name = self.get_statvar_name(stage)

self.measured = self.get_measured(stage)

self.input_dir = self._get_input_dir(stage)

self.input_params = self._get_input_params(stage)

# if there are more than one input param for given stage

if len(self.input_params) > 1:

print(

"""Warning: there were more than one initial parameter sets used for the

optimization for stage: {}. Make sure to compare the the correct input

params with their corresponding output sims.""".format(stage))

print('\nThis optimization stage used the\

following input parameter files:\n{}'.format('\n'.join(self.input_params)))

def _get_optr_jsons(self, work_dir, stage):

"""

Retrieve locations of optimization output jsons which contain

metadata needed to understand optimization results.

Create dictionary of each optimization with stage as key and lists

of corresponding json file paths as values.

Arguments:

work_dir (str): path to directory with model results, i.e.

location where simulation series outputs and optimization

json files are located, aka Optimizer.working_dir

stage (str): the stage ('ddsolrad', 'jhpet', 'flow', etc.) of

the optimization in which to gather the jsons

Returns:

ret (dict): dictionary of stage (keys) and lists of

json file paths for that stage (values).

"""

ret = {}

optr_metafile_re = re.compile(r'^.*_{}_opt(\d*)\.json'.\

format(stage))

ret[stage] = [OPJ(work_dir, f) for f in\

os.listdir(work_dir) if\

optr_metafile_re.match(f)]

return ret

def _get_input_dir(self, stage):

# retrieves the input directory from the first json file of given stage

json_file = self.metadata_json_paths[stage][0]

with open(json_file) as jf:

meta_dic = json.load(jf)

return '{}'.format(os.sep).join(meta_dic['original_params'].\

split(os.sep)[:-1])

def _get_input_params(self, stage):

json_files = self.metadata_json_paths[stage]

param_paths = []

for json_file in json_files:

with open(json_file) as jf:

meta_dic = json.load(jf)

param_paths.append(meta_dic['original_params'])

return list(set(param_paths))

def get_sim_dirs(self, stage):

jsons = self.metadata_json_paths[stage]

json_files = []

sim_dirs = []

for inf in jsons:

with open(inf) as json_file:

json_files.append(json.load(json_file))

for json_file in json_files:

sim_dirs.extend(json_file['sim_dirs'])

# list of all simulation directory paths for stage

return sim_dirs

def get_measured(self, stage):

# only need to open one json file to get this information

if not self.metadata_json_paths.get(stage):

return # no optimization json files exist for given stage

first_json = self.metadata_json_paths[stage][0]

with open(first_json) as json_file:

json_data = json.load(json_file)

measured_series = pd.Series.from_csv(json_data.get('measured'),\

parse_dates=True)

return measured_series

def get_statvar_name(self, stage):

# only need to open one json file to get this information

try:

first_json = self.metadata_json_paths[stage][0]

except:

raise ValueError("""No optimizatin has been run for

stage: {}""".format(stage))

with open(first_json) as json_file:

json_data = json.load(json_file)

var_name = json_data.get('statvar_name')

return var_name

def result_table(self, freq='daily', top_n=5, latex=False):

##TODO: add stats for freq options monthly, annual (means or sum)

sim_dirs = self.get_sim_dirs(self.stage)

if top_n >= len(sim_dirs): top_n = len(sim_dirs)

sim_names = [path.split(os.sep)[-1] for path in sim_dirs]

meas_var = self.get_measured(self.stage)

statvar_name = self.get_statvar_name(self.stage)

result_df = pd.DataFrame(columns=\

['NSE','RMSE','PBIAS','COEF_DET','ABS(PBIAS)'])

for i, sim in enumerate(sim_dirs):

sim_out = load_statvar(OPJ(sim, 'outputs', 'statvar.dat'))\

['{}'.format(statvar_name)]

idx = meas_var.index.intersection(sim_out.index)

meas_var = copy(meas_var[idx])

sim_out = sim_out[idx]

if freq == 'daily':

result_df.loc[sim_names[i]] = [\

nash_sutcliffe(meas_var, sim_out),\

rmse(meas_var, sim_out),\

percent_bias(meas_var,sim_out),\

meas_var.corr(sim_out)**2,\

np.abs(percent_bias(meas_var, sim_out)) ]

elif freq == 'monthly':

meas_mo = meas_var.groupby(meas_var.index.month).mean()

sim_out = sim_out.groupby(sim_out.index.month).mean()

result_df.loc[sim_names[i]] = [\

nash_sutcliffe(meas_mo, sim_out),\

rmse(meas_mo, sim_out),\

percent_bias(meas_mo, sim_out),\

meas_mo.corr(sim_out)**2,\

np.abs(percent_bias(meas_mo, sim_out)) ]

sorted_result = result_df.sort_values(by=['NSE','RMSE','ABS(PBIAS)',\

'COEF_DET'], ascending=[False,True,True,False])

sorted_result.columns.name = '{} parameters'.format(self.stage)

sorted_result = sorted_result[['NSE','RMSE','PBIAS','COEF_DET']]

if latex: return sorted_result[:top_n].to_latex(escape=False)

else: return sorted_result[:top_n]

def get_top_ranked_sims(self, sorted_df):

## use result table to make dic with best param and statvar paths

# index of table is the simulation directory names

ret = {

'dir_name' : [],

'param_path' : [],

'statvar_path' : []

}

for i,el in enumerate(sorted_df.index):

ret['dir_name'].append(el)

ret['param_path'].append(OPJ(self.working_dir,el,'inputs',\

'parameters'))

ret['statvar_path'].append(OPJ(self.working_dir,el,'outputs',\

'statvar.dat'))

return ret

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

optimizer.py

Latest commit

History

optimizer.py

File metadata and controls