PRMS-Python/prms_python/optimizer.py at issue10 · PRMS-Python/PRMS-Python

History

292 lines (236 loc) · 10.7 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

'''

optimizer.py -- Optimization routines for PRMS parameters and data.

'''

from __future__ import print_function

import pandas as pd

import numpy as np

import datetime as dt

import os, sys, json

from copy import deepcopy

from numpy import log10

from .data import Data

from .parameters import Parameters

from .simulation import Simulation, SimulationSeries

OPJ = os.path.join

class Optimizer:

'''

Container for a PRMS parameter optimization routine consisting of the

four stages as described in Hay, et al, 2006

(ftp://brrftp.cr.usgs.gov/pub/mows/software/luca_s/jawraHay.pdf).

Example:

>>> from prms_python import Data, Optimizer, Parameters

>>> params = Parameters('path/to/parameters')

>>> data = Data('path/to/data')

>>> control = 'path/to/control'

>>> work_directory = 'path/to/create/simulations'

>>> optr = Optimizer(params, data, control, work_directory, \

title='the title', description='desc')

>>> srad_hru = 2490

>>> optr.srad('path/to/reference_data/measured_srad.csv', srad_hru)

'''

## constant attributes for allowable range of solrad parameters

ir = (-60.0, 10.0) # PRMS dday_intcp range

sr = (0.2, 0.9) # dday_slope range

def __init__(self, parameters, data, control_file, working_dir,

title, description=None):

if isinstance(parameters, Parameters):

self.parameters = parameters

else:

raise TypeError('parameters must be instance of Parameters')

if isinstance(data, Data):

self.data = data

else:

raise TypeError('data must be instance of Data')

if not os.path.isdir(working_dir):

os.mkdir(working_dir)

## user needs to enter a title for output info now

# if not title:

# title = 'unnamed_optimization_{}'.format(\

# dt.datetime.today().strftime('%Y-%m-%d'))

self.control_file = control_file

self.working_dir = working_dir

self.title = title

self.description = description

def srad(self, reference_srad_path, station_nhru, n_sims=10, method='',\

nproc=None):

'''

Optimize the monthly dday_intcp and dday_slope parameters by one of

two methods: 'uniform' or 'random' for uniform sampling

Args:

reference_srad_path (str): path to measured solar radiation data

station_nhru (int): hru index in PRMS that is geographically

near the measured solar radiation location. You must

have swrad 'nhru' listed as a statvar output in your

control file.

Kwargs:

method (str): XXX not yet implemented- all uniform now

'uniform' or 'random'; if 'random',

intcp_delta and slope_delta are ignored, if provided

n_sims (int): number of simulations to conduct

parameter optimization/uncertaitnty analysis.

Returns:

(SradOptimizationResult)

'''

srad_start_time = dt.datetime.now()

srad_start_time = srad_start_time.replace(second=0, microsecond=0)

## shifting all monthly values by random amount from uniform distribution

## resampling degree day slope and intercept simoultaneously

intcps = [_resample_param(self.parameters['dday_intcp'], Optimizer.ir[0],\

Optimizer.ir[1]) for i in range(n_sims)]

slopes = [_resample_param(self.parameters['dday_slope'], Optimizer.sr[0],\

Optimizer.sr[1]) for i in range(n_sims)]

self.data.write(OPJ(self.working_dir, 'data'))

series = SimulationSeries(

Simulation.from_data(

self.data, _mod_params(self.parameters, intcps[i], slopes[i]),

self.control_file,

OPJ(

self.working_dir,

'intcp:{0:.3f}_slope:{1:.3f}'.format(np.mean(intcps[i]),\

np.mean(slopes[i]))

)

for i in range(n_sims)

)

# run all scenarios

outputs = series.run(nproc=nproc).outputs_iter()

srad_end_time = dt.datetime.now()

srad_end_time = srad_end_time.replace(second=0, microsecond=0)

def _error(x, y):

ret = abs(log10(x) - log10(y)).dropna()

ret = sum(ret)

return ret

measured_srad = pd.Series.from_csv(

reference_srad_path, parse_dates=True

)

srad_meta = {'stage' : 'swrad',

'hru_id' : station_nhru,

'optimization_title' : self.title,

'optimization_description' : self.description,

'start_time' : str(srad_start_time),

'end_time' : str(srad_end_time),

'measured_swrad' : reference_srad_path,

'sim_dirs' : [],

'original_params' : self.parameters.base_file

}

for output in outputs:

srad_meta['sim_dirs'].append(output['simulation_dir'])

json_outfile = OPJ(self.working_dir, '{0}_swrad_opt.json'.format(self.title))

with open(json_outfile, 'w') as outf:

json.dump(srad_meta, outf, sort_keys = True, indent = 4, ensure_ascii = False)

print('{0}\nOutput information sent to {1}\n'.format('-' * 80, json_outfile))

# # calculate the top performing

# errors = (

# (

# output['simulation_dir'],

# _error(measured_srad,

# output['statvar']['swrad_' + str(station_nhru)])

# )

# for output in outputs

# )

# print("directory, error (swrad)")

# for directory, error in errors:

# print(directory, error) #

# monthly_errors = {str(mo): [] for mo in range(12)}

# for directory, error in errors:

# month, intcp, slope = (el.split(':')[1] for el in

# directory.split(os.sep)[-1].split('_'))

# monthly_errors[month].append((intcp, slope, error))

# rankings = {

# str(mo): list(sorted(monthly_errors[str(mo)], key=lambda x: x[-1]))

# for mo in range(12)

# }

# tops = [(mo, rankings[str(mo)][0]) for mo in range(12)]

# # update internal parameters

# for top in tops:

# mo = top[0]

# self.parameters['dday_intcp'][mo] = tops[mo][1][0]

# self.parameters['dday_slope'][mo] = tops[mo][1][1]

# return {

# 'best': tops,

# 'all': rankings

# }

def _resample_param(param, p_min, p_max, noise_factor=0.1 ):

"""

Resample PRMS parameter by shifting all values by a constant that is

taken from a uniform distribution, where the range of the shift

values is equal to the difference between the min(max) of the parameter

set and the min(max) of the allowable range from PRMS. Next add noise

to each parameter element by adding a RV from a normal distribution

with mean 0, sigma = param allowable range / 10.

Arguments:

param (numpy.ndarray): ndarray of parameter to be resampled

p_min (float): lower bound of PRMS allowable range for param

p_max (float): upper bound of PRMS allowable range for param

noise_factor (float): factor to multiply parameter range by,

use the result as the standard deviation for the normal rand.

variable used to add element wise noise. i.e. higher

noise facter will result in higher noise added to each param

element.

Returns:

tmp (numpy.ndarry): ndarray of param after uniform random mean

shift and element-wise noise addition (normal r.v.)

"""

low_bnd = p_min - np.min(param) # lowest param value minus allowable min

up_bnd = p_max - np.max(param)

s = (p_max - p_min) * noise_factor # default is one tenth rangee (0.1)

shifted_param = np.random.uniform(low=low_bnd, high=up_bnd) + param

## add noise to each point keeping result within allowable range

while True:

tmp = shifted_param + np.random.normal(0,s,size=(np.shape(param)))

if np.max(tmp) <= p_max and np.min(tmp) >= p_min:

return tmp

def _mod_params(parameters, intcp, slope):

ret = deepcopy(parameters)

#print (intcp, slope)

ret['dday_intcp'] = intcp

ret['dday_slope'] = slope

return ret

class OptimizationResult:

def __init__(self, working_dir, stage='all'):

self.working_dir = working_dir

self.stage = stage

self.metadata_json_paths = self.get_optr_jsons(working_dir, stage)

def get_optr_jsons(self, work_dir, stage):

"""

Retrieve locations of optimization output jsons which contain

important metadata needed to understand optimization results.

Create dictionary of each optimization stage as keys, and lists

of corresponding json file paths for each stage as values.

Arguments:

work_dir (str): path to simulation directory where

optimization simulations were conducted and where

corresponding json files should exist.

stage (str): the stage ('swrad', 'pet', 'flow', etc.) of

the optimization in which to gather the jsons, if

stage is 'all' then each stage will be gathered.

Returns:

ret (dict): dictionary of stage (keys) and lists of

json file paths for that stage (values).

"""

ret = {}

if stage != 'all':

ret[stage] = [OPJ(work_dir, f) for f in\

os.listdir(work_dir) if\

f.endswith('_{0}_opt.json'.format(stage)) ]

else:

stages = ['swrad', 'pet', 'flow']

for s in stages:

ret[s] = self.get_optr_jsons(work_dir, s)

return ret

class SradOptimizationResult(OptimizationResult):

def __init__(self, working_dir, stage='swrad' ):

OptimizationResult.__init__(self, working_dir, stage)

class PetOptimizationResult(OptimizationResult):

def __init__(self, working_dir, stage='pet'):

OptimizationResult.__init__(self, working_dir, stage)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

optimizer.py

Latest commit

History

optimizer.py

File metadata and controls