Skip to content

Commit 70cb2dd

Browse files
Merge pull request PPPLDeepLearning#27 from PPPLDeepLearning/batch_norm_bidirectional
Enable batch norm and bidirectional LSTM
2 parents 124005a + 40bfabc commit 70cb2dd

File tree

3 files changed

+53
-36
lines changed

3 files changed

+53
-36
lines changed

examples/conf.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ data:
5353
floatx: 'float32'
5454

5555
model:
56+
use_bidirectional: false
57+
use_batch_norm: false
5658
shallow: False
5759
shallow_model:
5860
num_samples: 1000000 #1000000 #the number of samples to use for training

plasma/models/builder.py

Lines changed: 50 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import keras
33
from keras.models import Sequential, Model
44
from keras.layers import Input
5-
from keras.layers.core import Dense, Activation, Dropout, Lambda, Reshape, Flatten, Permute
6-
from keras.layers.recurrent import LSTM, SimpleRNN
5+
from keras.layers.core import Dense, Activation, Dropout, Lambda, Reshape, Flatten, Permute, RepeatVector
6+
from keras.layers import LSTM, SimpleRNN, Bidirectional, BatchNormalization
77
from keras.layers.convolutional import Convolution1D
88
from keras.layers.pooling import MaxPooling1D
99
from keras.utils.data_utils import get_file
@@ -12,7 +12,6 @@
1212
from keras.callbacks import Callback
1313
from keras.regularizers import l1,l2,l1_l2
1414

15-
1615
import keras.backend as K
1716

1817
import dill
@@ -73,10 +72,12 @@ def get_0D_1D_indices(self):
7372
def build_model(self,predict,custom_batch_size=None):
7473
conf = self.conf
7574
model_conf = conf['model']
75+
use_bidirectional = model_conf['use_bidirectional']
7676
rnn_size = model_conf['rnn_size']
7777
rnn_type = model_conf['rnn_type']
7878
regularization = model_conf['regularization']
7979
dense_regularization = model_conf['dense_regularization']
80+
use_batch_norm = model_conf['use_batch_norm']
8081

8182
dropout_prob = model_conf['dropout_prob']
8283
length = model_conf['length']
@@ -118,6 +119,7 @@ def build_model(self,predict,custom_batch_size=None):
118119
batch_shape_non_temporal=(batch_size,num_signals)
119120

120121
indices_0d,indices_1d,num_0D,num_1D = self.get_0D_1D_indices()
122+
121123
def slicer(x,indices):
122124
return x[:,indices]
123125

@@ -130,23 +132,47 @@ def slicer_output_shape(input_shape,indices):
130132
pre_rnn_input = Input(shape=(num_signals,))
131133

132134
if num_1D > 0:
133-
#pre_rnn_0D = Lambda(lambda x: slicer(x,indices_0d),lambda s: slicer_output_shape(s,indices_0d))(pre_rnn_input)
134-
#pre_rnn_1D = Lambda(lambda x: slicer(x,indices_1d),lambda s: slicer_output_shape(s,indices_1d))(pre_rnn_input)
135-
#idx0D_tensor = K.variable(indices_0d)
136-
#idx1D_tensor = K.variable(indices_1d)
137135
pre_rnn_1D = Lambda(lambda x: x[:,len(indices_0d):],output_shape=(len(indices_1d),))(pre_rnn_input)
138136
pre_rnn_0D = Lambda(lambda x: x[:,:len(indices_0d)],output_shape=(len(indices_0d),))(pre_rnn_input)# slicer(x,indices_0d),lambda s: slicer_output_shape(s,indices_0d))(pre_rnn_input)
139137
pre_rnn_1D = Reshape((num_1D,len(indices_1d)//num_1D)) (pre_rnn_1D)
140138
pre_rnn_1D = Permute((2,1)) (pre_rnn_1D)
141139

142140
for i in range(model_conf['num_conv_layers']):
143141
div_fac = 2**i
144-
pre_rnn_1D = Convolution1D(num_conv_filters//div_fac,size_conv_filters,padding='valid',activation='relu') (pre_rnn_1D)
145-
pre_rnn_1D = Convolution1D(num_conv_filters//div_fac,1,padding='valid',activation='relu') (pre_rnn_1D)
142+
'''The first conv layer learns `num_conv_filters//div_fac` filters (aka kernels),
143+
each of size `(size_conv_filters, num1D)``. Its output will have shape
144+
(None, len(indices_1d)//num_1D - size_conv_filters + 1, num_conv_filters//div_fac),
145+
i.e., for each position in the input spatial series (direction along radius),
146+
the activation of each filter at that position.'''
147+
148+
'''For i=1 first conv layer would get:
149+
(None, (len(indices_1d)//num_1D - size_conv_filters + 1)/pool_size-size_conv_filters+1,num_conv_filters//div_fac)'''
150+
pre_rnn_1D = Convolution1D(num_conv_filters//div_fac,size_conv_filters,padding='valid') (pre_rnn_1D)
151+
if use_batch_norm: pre_rnn_1D = BatchNormalization()(pre_rnn_1D)
152+
pre_rnn_1D = Activation('relu')(pre_rnn_1D)
153+
154+
'''The output of the second conv layer will have shape
155+
(None, len(indices_1d)//num_1D - size_conv_filters + 1, num_conv_filters//div_fac),
156+
i.e., for each position in the input spatial series (direction along radius),
157+
the activation of each filter at that position.
158+
159+
for i=1 second layer would output
160+
(None, (len(indices_1d)//num_1D - size_conv_filters + 1)/pool_size-size_conv_filters+1,num_conv_filters//div_fac)'''
161+
pre_rnn_1D = Convolution1D(num_conv_filters//div_fac,1,padding='valid') (pre_rnn_1D)
162+
if use_batch_norm: pre_rnn_1D = BatchNormalization()(pre_rnn_1D)
163+
pre_rnn_1D = Activation('relu')(pre_rnn_1D)
164+
'''Outputs (None, (len(indices_1d)//num_1D - size_conv_filters + 1)/pool_size, num_conv_filters//div_fac)
165+
166+
for i=1 pooling layer would output:
167+
(None,((len(indices_1d)//num_1D- size_conv_filters + 1)/pool_size-size_conv_filters+1)/pool_size,num_conv_filters//div_fac)'''
146168
pre_rnn_1D = MaxPooling1D(pool_size) (pre_rnn_1D)
147169
pre_rnn_1D = Flatten() (pre_rnn_1D)
148-
pre_rnn_1D = Dense(dense_size,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn_1D)
149-
pre_rnn_1D = Dense(dense_size//4,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn_1D)
170+
pre_rnn_1D = Dense(dense_size,kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn_1D)
171+
if use_batch_norm: pre_rnn_1D = BatchNormalization()(pre_rnn_1D)
172+
pre_rnn_1D = Activation('relu')(pre_rnn_1D)
173+
pre_rnn_1D = Dense(dense_size//4,kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn_1D)
174+
if use_batch_norm: pre_rnn_1D = BatchNormalization()(pre_rnn_1D)
175+
pre_rnn_1D = Activation('relu')(pre_rnn_1D)
150176
pre_rnn = Concatenate() ([pre_rnn_0D,pre_rnn_1D])
151177
else:
152178
pre_rnn = pre_rnn_input
@@ -157,33 +183,22 @@ def slicer_output_shape(input_shape,indices):
157183
pre_rnn = Dense(dense_size//4,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn)
158184

159185
pre_rnn_model = Model(inputs = pre_rnn_input,outputs=pre_rnn)
186+
pre_rnn_model.summary()
160187
x_input = Input(batch_shape = batch_input_shape)
161188
x_in = TimeDistributed(pre_rnn_model) (x_input)
162189

163-
# x_input = Input(batch_shape=batch_input_shape)
164-
# if num_1D > 0:
165-
# x_0D = Lambda(lambda x: slicer(x,indices_0d),lambda s: slicer_output_shape(s,indices_0d)) (x_input)
166-
# x_1D = Lambda(lambda x: slicer(x,indices_1d),lambda s: slicer_output_shape(s,indices_1d)) (x_input)
167-
#
168-
# x_1D = TimeDistributed(Reshape((num_1D,len(indices_1d)/num_1D))) (x_1D)
169-
# for i in range(model_conf['num_conv_layers']):
170-
# x_1D = TimeDistributed(Conv1D(num_conv_filters,size_conv_filters,activation='relu')) (x_1D)
171-
# x_1D = TimeDistributed(MaxPooling1D(pool_size)) (x_1D)
172-
# x_1D = TimeDistributed(Flatten()) (x_1D)
173-
# x_in = TimeDistributed(Concatenate) ([x_0D,x_1D])
174-
#
175-
# else:
176-
# x_in = x_input
177-
#x_in = TimeDistributed(Dense(100,activation='tanh')) (x_in)
178-
#x_in = TimeDistributed(Dense(30,activation='tanh')) (x_in)
179-
#x_in = TimeDistributed(Dense(2*(num_0D+num_1D)),activation='relu') (x_in)
180-
# x = TimeDistributed(Dense(2*(num_0D+num_1D)))
181-
# model.add(TimeDistributed(Dense(num_density_channels,bias=True),batch_input_shape=batch_input_shape))
182-
for _ in range(model_conf['rnn_layers']):
183-
x_in = rnn_model(rnn_size, return_sequences=return_sequences,#batch_input_shape=batch_input_shape,
184-
stateful=stateful,kernel_regularizer=l2(regularization),recurrent_regularizer=l2(regularization),
185-
bias_regularizer=l2(regularization),dropout=dropout_prob,recurrent_dropout=dropout_prob) (x_in)
186-
x_in = Dropout(dropout_prob) (x_in)
190+
if use_bidirectional:
191+
for _ in range(model_conf['rnn_layers']):
192+
x_in = Bidirectional(rnn_model(rnn_size, return_sequences=return_sequences,
193+
stateful=stateful,kernel_regularizer=l2(regularization),recurrent_regularizer=l2(regularization),
194+
bias_regularizer=l2(regularization),dropout=dropout_prob,recurrent_dropout=dropout_prob)) (x_in)
195+
x_in = Dropout(dropout_prob) (x_in)
196+
else:
197+
for _ in range(model_conf['rnn_layers']):
198+
x_in = rnn_model(rnn_size, return_sequences=return_sequences,#batch_input_shape=batch_input_shape,
199+
stateful=stateful,kernel_regularizer=l2(regularization),recurrent_regularizer=l2(regularization),
200+
bias_regularizer=l2(regularization),dropout=dropout_prob,recurrent_dropout=dropout_prob) (x_in)
201+
x_in = Dropout(dropout_prob) (x_in)
187202
if return_sequences:
188203
#x_out = TimeDistributed(Dense(100,activation='tanh')) (x_in)
189204
x_out = TimeDistributed(Dense(1,activation=output_activation)) (x_in)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
download_url = "https://github.com/PPPLDeepLearning/plasma-python",
2626
#license = "Apache Software License v2",
2727
test_suite = "tests",
28-
install_requires = ['keras==2.0.6','pathos','matplotlib==2.0.2','hyperopt','mpi4py','xgboost'],
28+
install_requires = ['keras>2.0.8','pathos','matplotlib==2.0.2','hyperopt','mpi4py','xgboost'],
2929
tests_require = [],
3030
classifiers = ["Development Status :: 3 - Alpha",
3131
"Environment :: Console",

0 commit comments

Comments
 (0)