forked from PPPLDeepLearning/plasma-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconf.yaml
More file actions
160 lines (157 loc) · 5.84 KB
/
conf.yaml
File metadata and controls
160 lines (157 loc) · 5.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# conf.py will parse the yaml and extract parameters based on what is specified
# note, the YAML parser will NOT evaluate expressions in the value fields.
# e.g. "validation_frac: 1.0/3.0" will result in str value "1.0/3.0"
# will read and write (normalization, etc.) shot data
# in fs_path / [username] / signal_data | shot_lists | processed shots, etc.
# (username is automatically added as first subdir if user_subdir==True)
# will output csvlog, trained model checkpoints, etc.
# in fs_path_output / [username] / results | csv_logs | model_checkpoints | Graph, etc.
fs_path: '/Users/'
user_subdir: True
fs_path_output: '/Users/'
user_subdir_output: True
target: 'hinge' # 'maxhinge' # 'maxhinge' # 'binary' # 'hinge'
num_gpus: 1 # per node
paths:
signal_prepath: '/signal_data/' # /signal_data/jet/
shot_list_dir: '/shot_lists/'
tensorboard_save_path: '/Graph/'
data: d3d_0D
# if specific_signals: [] left empty, it will use all valid signals defined on a machine. Only use if need a custom set
specific_signals: [] # ['q95','li','ip','betan','energy','lm','pradcore','pradedge','pradtot','pin','torquein','tmamp1','tmamp2','tmfreq1','tmfreq2','pechin','energydt','ipdirect','etemp_profile','edens_profile']
executable: "mpi_learn.py"
shallow_executable: "learn.py"
data:
bleed_in: 0 # how many shots from the test set to use in training?
bleed_in_repeat_fac: 1 # how many times to repeat shots in training and validation?
bleed_in_remove_from_test: True
bleed_in_equalize_sets: False
signal_to_augment: None # 'plasma current'
augmentation_mode: None
augment_during_training: False
cut_shot_ends: True
recompute: False
recompute_normalization: False
# specifies which of the signals in the signals_dirs order contains the plasma current info
current_index: 0
plotting: False
# maximum number of shots to use
use_shots: 200000 # 1000
positive_example_penalty: 1.0 # by what factor to upweight positive examples?
# normalization timescale
dt: 0.001
T_min_warn: 30
# maximum TTD considered
T_max: 1000.0
# warning time in seconds
# The shortest works best so far: less overfitting. log(TTD) prediction also works well. 0.5s better than 0.2s
T_warning: 1.024 # 0.512 # 0.25 # 1.0
current_thresh: 750000
current_end_thresh: 10000
# the characteristic decay length of the decaying moving average window
window_decay: 2
# the width of the actual window
window_size: 10
# TODO(KGF): optimize the normalizer parameters
normalizer: 'var'
norm_stat_range: 100.0
equalize_classes: False
# the fraction of samples with which to train the shallow model
# shallow_sample_prob: 0.01
floatx: 'float32'
model:
loss_scale_factor: 1.0
use_batch_norm: false
torch: False
shallow: False
shallow_model:
type: "xgboost" # "random_forest"
# the number of samples to use for training
num_samples: 1000000 # 1000000
n_estimators: 100 # used in random forest
max_depth: 3 # used in random forest and xgboost (def = 3)
C: 1.0 # used in svm
kernel: "rbf" # rbf, sigmoid, linear, poly, for svm
learning_rate: 0.1 # used in xgboost
scale_pos_weight: 10.0 # used in xgboost
# final layer has this many neurons, every layer before has twice as many
final_hidden_layer_size: 10
num_hidden_layers: 3
learning_rate_mlp: 0.0001
mlp_regularization: 0.0001
# should a finished model be loaded if available?
skip_train: False
# length of LSTM memory
pred_length: 200
pred_batch_size: 128
# TODO(KGF): optimize length of LSTM memory
length: 128
skip: 1
# hidden layer size
# TODO(KGF): optimize size of RNN layers
# size 100 slight overfitting, size 20 no overfitting. 200 is not better than 100.
# Prediction is much better with size 100, size 20 cannot capture the data.
rnn_size: 200
rnn_type: 'LSTM'
# TODO(KGF): optimize number of RNN layers
rnn_layers: 2
num_conv_filters: 128
size_conv_filters: 3
num_conv_layers: 3
pool_size: 2
dense_size: 128
extra_dense_input: False
# have not found a difference yet
optimizer: 'adam'
clipnorm: 10.0
regularization: 0.001
dense_regularization: 0.001
# lr=1e-4 is too high, 5e-7 is too low. 5e-5 seems best at 256 batch size, full dataset
# and ~10 epochs, and lr decay of 0.90
# lr=1e-4 also works well if we decay a lot (i.e ~0.7 or more)
lr: 0.00002 # 0.00001 # 0.0005 # for adam plots 0.0000001
lr_decay: 0.97 # 0.98 # 0.9
stateful: True
return_sequences: True
dropout_prob: 0.1
# only relevant if we want to do MPI training. The number of steps with a single replica
warmup_steps: 0
# how many initial timesteps to ignore during evaluation (to let the internal state settle)
ignore_timesteps: 100
backend: 'tensorflow'
training:
as_array_of_shots: True
shuffle_training: True
# used iff 1) test & 2) (train U validate) are both sampled from the same distribution/source lists of shots:
train_frac: 0.75
validation_frac: 0.3333333333333333
batch_size: 128 # 256
# THE MAX_PATCH_LENGTH WAS THE CULPRIT FOR NO TRAINING! Lower than 1000 performs very poorly
max_patch_length: 100000
# How many shots are we loading at once?
num_shots_at_once: 200
# large number = maximum number of epochs.
# Early stopping will occur if loss does not decrease, after some patience # of epochs
num_epochs: 1000
use_mock_data: False
data_parallel: False
hyperparam_tuning: False
batch_generator_warmup_steps: 0
use_process_generator: False
num_batches_minimum: 20 # minimum number of batches per epoch
ranking_difficulty_fac: 1.0 # how much to upweight incorrectly classified shots during training
timeline_prof: False
step_limit: 50
no_validation: True
callbacks:
list: ['earlystop']
metrics: ['val_loss','val_roc','train_loss']
mode: 'max'
monitor: 'val_roc'
patience: 5
write_grads: False
monitor_test: True
monitor_times: [30,70,200,500,1000]
env:
name: 'frnn'
type: 'anaconda'