Skip to content

Commit 8512cd9

Browse files
authored
Merge pull request PPPLDeepLearning#29 from PPPLDeepLearning/jdev
Merge jdev into master.
2 parents 77e41a5 + e21f431 commit 8512cd9

File tree

15 files changed

+430
-313
lines changed

15 files changed

+430
-313
lines changed

data/signals.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,10 +240,15 @@ def fetch_nstx_data(signal_path,shot_num,c):
240240

241241
fully_defined_signals = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if sig.is_defined_on_machines(all_machines)}
242242
fully_defined_signals_0D = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if ( sig.is_defined_on_machines(all_machines) and sig.num_channels == 1) }
243+
fully_defined_signals_1D = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if ( sig.is_defined_on_machines(all_machines) and sig.num_channels > 1) }
244+
243245
d3d_signals = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if sig.is_defined_on_machine(d3d)}
246+
d3d_signals_0D = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if (sig.is_defined_on_machine(d3d) and sig.num_channels == 1)}
247+
d3d_signals_1D = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if (sig.is_defined_on_machine(d3d) and sig.num_channels > 1)}
248+
244249
jet_signals = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if sig.is_defined_on_machine(jet)}
245250
jet_signals_0D = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if (sig.is_defined_on_machine(jet) and sig.num_channels == 1)}
246-
251+
jet_signals_1D = {sig_name: sig for (sig_name, sig) in all_signals_restricted.items() if (sig.is_defined_on_machine(jet) and sig.num_channels > 1)}
247252

248253
#['pcechpwrf'] #Total ECH Power Not always on!
249254
### 0D EFIT signals ###

examples/conf.yaml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#will do stuff in fs_path / [username] / signal_data | shot_lists | processed shots, etc.
44

55
fs_path: '/tigress'
6-
target: 'maxhinge' #'maxhinge' #'maxhinge' #'binary' #'hinge'
6+
target: 'hinge' #'maxhinge' #'maxhinge' #'binary' #'hinge'
77
num_gpus: 4
88

99
paths:
@@ -17,8 +17,9 @@ paths:
1717

1818
data:
1919
bleed_in: 0 #how many shots from the test sit to use in training?
20+
bleed_in_repeat_fac: 1 #how many times to repeat shots in training and validation?
2021
bleed_in_remove_from_test: True
21-
bleed_in_equalize_sets: True
22+
bleed_in_equalize_sets: False
2223
signal_to_augment: None #'plasma current' #or None
2324
augmentation_mode: 'none'
2425
augment_during_training: False
@@ -58,7 +59,7 @@ model:
5859
shallow: False
5960
shallow_model:
6061
num_samples: 1000000 #1000000 #the number of samples to use for training
61-
type: "mlp" #"xgboost" #"xgboost" #"random_forest" "xgboost"
62+
type: "xgboost" #"xgboost" #"xgboost" #"random_forest" "xgboost"
6263
n_estimators: 100 #for random forest
6364
max_depth: 3 #for random forest and xgboost (def = 3)
6465
C: 1.0 #for svm
@@ -92,8 +93,8 @@ model:
9293
#have not found a difference yet
9394
optimizer: 'adam'
9495
clipnorm: 10.0
95-
regularization: 0.0
96-
dense_regularization: 0.01
96+
regularization: 0.001
97+
dense_regularization: 0.001
9798
#1e-4 is too high, 5e-7 is too low. 5e-5 seems best at 256 batch size, full dataset and ~10 epochs, and lr decay of 0.90. 1e-4 also works well if we decay a lot (i.e ~0.7 or more)
9899
lr: 0.00002 #0.00001 #0.0005 #for adam plots 0.0000001 #0.00005 #0.00005 #0.00005
99100
lr_decay: 0.97 #0.98 #0.9

examples/mpi_learn.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@
8686
loader = Loader(conf,normalizer)
8787
print("...done")
8888

89+
#ensure training has a separate random seed for every worker
90+
np.random.seed(task_index)
91+
random.seed(task_index)
8992
if not only_predict:
9093
mpi_train(conf,shot_list_train,shot_list_validate,loader)
9194

examples/notebooks/FRNN_scaling.ipynb

Lines changed: 245 additions & 205 deletions
Large diffs are not rendered by default.

examples/performance_analysis.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
#P_thresh_opt = 0.566#0.566#0.92# analyzer.compute_tradeoffs_and_print_from_training()
3535
linestyle="-"
3636

37-
analyzer.compute_tradeoffs_and_plot('test',save_figure=save_figure,plot_string='_test',linestyle=linestyle)
37+
P_thresh_range,missed_range,fp_range = analyzer.compute_tradeoffs_and_plot('test',save_figure=save_figure,plot_string='_test',linestyle=linestyle)
38+
np.savez('test_roc.npz',"P_thresh_range",P_thresh_range,"missed_range",missed_range,"fp_range",fp_range)
3839
analyzer.compute_tradeoffs_and_plot('train',save_figure=save_figure,plot_string='_train',linestyle=linestyle)
3940

4041
analyzer.summarize_shot_prediction_stats_by_mode(P_thresh_opt,'test')
@@ -55,6 +56,8 @@
5556

5657
alarms,disr_alarms,nondisr_alarms = analyzer.gather_first_alarms(P_thresh_opt,'test')
5758
analyzer.hist_alarms(disr_alarms,'disruptive alarms, P thresh = {}'.format(P_thresh_opt),save_figure=save_figure,linestyle=linestyle)
59+
np.savez('disruptive_alarms_test.npz',"disr_alarms",disr_alarms,"P_thresh_opt",P_thresh_opt)
60+
5861
print('{} disruptive alarms'.format(len(disr_alarms)))
5962
print('{} seconds mean alarm time'.format(np.mean(disr_alarms[disr_alarms > 0])))
6063
print('{} seconds median alarm time'.format(np.median(disr_alarms[disr_alarms > 0])))

examples/tune_hyperparams.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
tunables = []
99
shallow = False
10-
num_nodes = 2
11-
num_trials = 50
10+
num_nodes = 1
11+
num_trials = 20
1212

1313
t_warn = CategoricalHyperparam(['data','T_warning'],[0.256,1.024,10.024])
1414
cut_ends = CategoricalHyperparam(['data','cut_shot_ends'],[False,True])
@@ -34,14 +34,20 @@
3434
lr_decay = CategoricalHyperparam(['model','lr_decay'],[0.97,0.985,1.0])
3535
fac = CategoricalHyperparam(['data','positive_example_penalty'],[1.0,4.0,16.0])
3636
target = CategoricalHyperparam(['target'],['maxhinge','hinge','ttdinv','ttd'])
37-
batch_size = CategoricalHyperparam(['training','batch_size'],[64,256,1024])
38-
dropout_prob = CategoricalHyperparam(['model','dropout_prob'],[0.1,0.3,0.5])
39-
conv_filters = CategoricalHyperparam(['model','num_conv_filters'],[5,10])
37+
#target = CategoricalHyperparam(['target'],['hinge','ttdinv','ttd'])
38+
batch_size = CategoricalHyperparam(['training','batch_size'],[128,256])
39+
dropout_prob = CategoricalHyperparam(['model','dropout_prob'],[0.01,0.05,0.1])
40+
conv_filters = CategoricalHyperparam(['model','num_conv_filters'],[128,256])
4041
conv_layers = IntegerHyperparam(['model','num_conv_layers'],2,4)
41-
rnn_layers = IntegerHyperparam(['model','rnn_layers'],1,4)
42-
rnn_size = CategoricalHyperparam(['model','rnn_size'],[100,200,300])
43-
tunables = [lr,lr_decay,fac,target,batch_size,dropout_prob]
44-
tunables += [conv_filters,conv_layers,rnn_layers,rnn_size]
42+
rnn_layers = IntegerHyperparam(['model','rnn_layers'],1,3)
43+
rnn_size = CategoricalHyperparam(['model','rnn_size'],[128,256])
44+
dense_size = CategoricalHyperparam(['model','dense_size'],[128,256])
45+
extra_dense_input = CategoricalHyperparam(['model','extra_dense_input'],[False,True])
46+
equalize_classes = CategoricalHyperparam(['data','equalize_classes'],[False,True])
47+
#rnn_length = CategoricalHyperparam(['model','length'],[32,128])
48+
#tunables = [lr,lr_decay,fac,target,batch_size,dropout_prob]
49+
tunables = [lr,lr_decay,fac,target,batch_size,equalize_classes,dropout_prob]
50+
tunables += [conv_filters,conv_layers,rnn_layers,rnn_size,dense_size,extra_dense_input]
4551
tunables += [cut_ends,t_warn]
4652

4753

plasma/conf_parser.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ def parameters(input_file):
9393
params['paths']['shot_files'] = [jet_carbon_wall]
9494
params['paths']['shot_files_test'] = [jet_iterlike_wall]
9595
params['paths']['use_signals_dict'] = jet_signals_0D
96+
elif params['paths']['data'] == 'jet_data_1D':
97+
params['paths']['shot_files'] = [jet_carbon_wall]
98+
params['paths']['shot_files_test'] = [jet_iterlike_wall]
99+
params['paths']['use_signals_dict'] = jet_signals_1D
96100
elif params['paths']['data'] == 'jet_carbon_data':
97101
params['paths']['shot_files'] = [jet_carbon_wall]
98102
params['paths']['shot_files_test'] = []
@@ -105,6 +109,17 @@ def parameters(input_file):
105109
params['paths']['shot_files'] = [jenkins_jet_carbon_wall]
106110
params['paths']['shot_files_test'] = [jenkins_jet_iterlike_wall]
107111
params['paths']['use_signals_dict'] = jet_signals
112+
elif params['paths']['data'] == 'jet_data_fully_defined': #jet data but with fully defined signals
113+
params['paths']['shot_files'] = [jet_carbon_wall]
114+
params['paths']['shot_files_test'] = [jet_iterlike_wall]
115+
params['paths']['use_signals_dict'] = fully_defined_signals
116+
elif params['paths']['data'] == 'jet_data_fully_defined_0D': #jet data but with fully defined signals
117+
params['paths']['shot_files'] = [jet_carbon_wall]
118+
params['paths']['shot_files_test'] = [jet_iterlike_wall]
119+
params['paths']['use_signals_dict'] = fully_defined_signals_0D
120+
121+
122+
108123
elif params['paths']['data'] == 'd3d_data':
109124
params['paths']['shot_files'] = [d3d_full]
110125
params['paths']['shot_files_test'] = []
@@ -131,25 +146,40 @@ def parameters(input_file):
131146
params['paths']['shot_files_test'] = []
132147
params['paths']['use_signals_dict'] = {'q95':q95,'li':li,'ip':ip,'lm':lm,'betan':betan,'energy':energy,'dens':dens,'pradcore':pradcore,'pradedge':pradedge,'pin':pin,'torquein':torquein,'ipdirect':ipdirect,'iptarget':iptarget,'iperr':iperr,
133148
'etemp_profile':etemp_profile ,'edens_profile':edens_profile}
134-
149+
elif params['paths']['data'] == 'd3d_data_fully_defined': #jet data but with fully defined signals
150+
params['paths']['shot_files'] = [d3d_full]
151+
params['paths']['shot_files_test'] = []
152+
params['paths']['use_signals_dict'] = fully_defined_signals
153+
elif params['paths']['data'] == 'd3d_data_fully_defined_0D': #jet data but with fully defined signals
154+
params['paths']['shot_files'] = [d3d_full]
155+
params['paths']['shot_files_test'] = []
156+
params['paths']['use_signals_dict'] = fully_defined_signals_0D
135157

136158
#cross-machine
137159
elif params['paths']['data'] == 'jet_to_d3d_data':
138-
params['paths']['shot_files'] = [jet_carbon_wall]
160+
params['paths']['shot_files'] = [jet_full]
139161
params['paths']['shot_files_test'] = [d3d_full]
140162
params['paths']['use_signals_dict'] = fully_defined_signals
141163
elif params['paths']['data'] == 'd3d_to_jet_data':
142164
params['paths']['shot_files'] = [d3d_full]
143165
params['paths']['shot_files_test'] = [jet_iterlike_wall]
144166
params['paths']['use_signals_dict'] = fully_defined_signals
145167
elif params['paths']['data'] == 'jet_to_d3d_data_0D':
146-
params['paths']['shot_files'] = [jet_carbon_wall]
168+
params['paths']['shot_files'] = [jet_full]
147169
params['paths']['shot_files_test'] = [d3d_full]
148170
params['paths']['use_signals_dict'] = fully_defined_signals_0D
149171
elif params['paths']['data'] == 'd3d_to_jet_data_0D':
150172
params['paths']['shot_files'] = [d3d_full]
151173
params['paths']['shot_files_test'] = [jet_iterlike_wall]
152174
params['paths']['use_signals_dict'] = fully_defined_signals_0D
175+
elif params['paths']['data'] == 'jet_to_d3d_data_1D':
176+
params['paths']['shot_files'] = [jet_full]
177+
params['paths']['shot_files_test'] = [d3d_full]
178+
params['paths']['use_signals_dict'] = fully_defined_signals_1D
179+
elif params['paths']['data'] == 'd3d_to_jet_data_1D':
180+
params['paths']['shot_files'] = [d3d_full]
181+
params['paths']['shot_files_test'] = [jet_iterlike_wall]
182+
params['paths']['use_signals_dict'] = fully_defined_signals_1D
153183

154184

155185

plasma/models/builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def slicer_output_shape(input_shape,indices):
183183
pre_rnn = Dense(dense_size//4,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn)
184184

185185
pre_rnn_model = Model(inputs = pre_rnn_input,outputs=pre_rnn)
186-
pre_rnn_model.summary()
186+
#pre_rnn_model.summary()
187187
x_input = Input(batch_shape = batch_input_shape)
188188
x_in = TimeDistributed(pre_rnn_model) (x_input)
189189

plasma/models/loader.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,15 @@ def training_batch_generator(self,shot_list):
8888
yield X[start:end],y[start:end],reset_states_now,num_so_far,num_total
8989
epoch += 1
9090

91-
def fill_training_buffer(self,Xbuff,Ybuff,end_indices,shot):
91+
def fill_training_buffer(self,Xbuff,Ybuff,end_indices,shot,is_first_fill=False):
9292
sig,res = self.get_signal_result_from_shot(shot)
93-
sig_len = res.shape[0]
9493
length = self.conf['model']['length']
94+
if is_first_fill:#cut signal to random position
95+
cut_idx = np.random.randint(res.shape[0]-length+1)
96+
sig = sig[cut_idx:]
97+
res = res[cut_idx:]
98+
99+
sig_len = res.shape[0]
95100
sig_len = (sig_len // length)*length #make divisible by lenth
96101
assert(sig_len > 0)
97102
batch_idx = np.where(end_indices == 0)[0][0]
@@ -157,8 +162,10 @@ def training_batch_generator_partial_reset(self,shot_list):
157162
num_total = len(shot_list)
158163
num_so_far = 0
159164
returned = False
165+
num_steps = 0
160166
warmup_steps = self.conf['training']['batch_generator_warmup_steps']
161-
is_warmup_period = warmup_steps > 0
167+
is_warmup_period = num_steps < warmup_steps
168+
is_first_fill = num_steps < batch_size
162169
while True:
163170
# the list of all shots
164171
shot_list.shuffle()
@@ -174,11 +181,12 @@ def training_batch_generator_partial_reset(self,shot_list):
174181
X,Y = self.return_from_training_buffer(Xbuff,Ybuff,end_indices)
175182
yield X,Y,batches_to_reset,num_so_far,num_total,is_warmup_period
176183
returned = True
177-
warmup_steps -= 1
178-
is_warmup_period = warmup_steps > 0
184+
num_steps += 1
185+
is_warmup_period = num_steps < warmup_steps
186+
is_first_fill = num_steps < batch_size
179187
batches_to_reset[:] = False
180188

181-
Xbuff,Ybuff,batch_idx = self.fill_training_buffer(Xbuff,Ybuff,end_indices,shot)
189+
Xbuff,Ybuff,batch_idx = self.fill_training_buffer(Xbuff,Ybuff,end_indices,shot,is_first_fill)
182190
batches_to_reset[batch_idx] = True
183191
if returned and not is_warmup_period:
184192
num_so_far += 1

plasma/models/mpi_runner.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import time
2121
import datetime
2222
import numpy as np
23+
import random
2324

2425
from functools import partial
2526
import socket
@@ -170,7 +171,9 @@ def get_val(self):
170171

171172
class MPIModel():
172173
def __init__(self,model,optimizer,comm,batch_iterator,batch_size,num_replicas=None,warmup_steps=1000,lr=0.01,num_batches_minimum=100):
173-
# random.seed(task_index)
174+
random.seed(task_index)
175+
np.random.seed(task_index)
176+
self.start_time = time.time()
174177
self.epoch = 0
175178
self.num_so_far = 0
176179
self.num_so_far_accum = 0
@@ -466,7 +469,7 @@ def train_epoch(self):
466469
loss_averager.add_val(curr_loss)
467470
ave_loss = loss_averager.get_val()
468471
eta = self.estimate_remaining_time(t0 - t_start,self.num_so_far-self.epoch*num_total,num_total)
469-
write_str = '\r[{}] step: {} [ETA: {:.2f}s] [{:.2f}/{}], loss: {:.5f} [{:.5f}] | '.format(self.task_index,step,eta,1.0*self.num_so_far,num_total,ave_loss,curr_loss)
472+
write_str = '\r[{}] step: {} [ETA: {:.2f}s] [{:.2f}/{}], loss: {:.5f} [{:.5f}] | walltime: {:.4f} | '.format(self.task_index,step,eta,1.0*self.num_so_far,num_total,ave_loss,curr_loss,time.time()-self.start_time)
470473
print_unique(write_str + write_str_0)
471474
step += 1
472475
else:
@@ -640,6 +643,7 @@ def mpi_make_predictions_and_evaluate(conf,shot_list,loader,custom_path=None):
640643

641644

642645
def mpi_train(conf,shot_list_train,shot_list_validate,loader, callbacks_list=None):
646+
643647
loader.set_inference_mode(False)
644648
conf['num_workers'] = comm.Get_size()
645649

0 commit comments

Comments
 (0)