Skip to content

Commit a8d4100

Browse files
committed
Merge branch 'master' into transformer
2 parents 019c1bd + 960f323 commit a8d4100

20 files changed

+197
-120
lines changed

envs/adroit.cmd

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env bash
2+
3+
module load anaconda3
4+
# must activate conda env before module loads
5+
conda activate frnn
6+
export OMPI_MCA_btl="tcp,self,vader"
7+
8+
module load cudatoolkit/10.2
9+
module load cudnn/cuda-10.1/7.6.3
10+
module load openmpi/gcc/3.1.5/64
11+
module load hdf5/gcc/openmpi-1.10.2/1.10.0 # like TigerGPU, this is older than version on Traverse, hdf5/gcc/openmpi-3.1.4/1.10.5

envs/requirements-cpu.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ flake8
44
h5py
55
pyparsing
66
pyyaml
7-
tensorflow-gpu>=1.3,<2.0.0
7+
pytorch>1.3
8+
tensorflow>=1.3,<2.0.0

envs/requirements-linux-64-gpu.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ dependencies:
1212
- h5py
1313
- pyparsing
1414
- pyyaml
15+
- pytorch>1.3
1516
- tensorflow-gpu>=1.3,<2.0.0
1617
- pip:
17-
- keras>=2.0.5,<2.3.0
1818
- pathos
1919
- matplotlib>=2.0.2
2020
- hyperopt # TODO(KGF): remove

envs/requirements-traverse.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ dependencies:
1414
- h5py
1515
- pyparsing
1616
- pyyaml
17+
- pytorch>1.3
1718
- tensorflow-gpu>=1.3,<2.0.0
1819
- pip:
19-
- keras>=2.0.5,<2.3.0
2020
- pathos
2121
- matplotlib>=2.0.2
2222
- hyperopt # TODO(KGF): remove

envs/tigergpu.cmd

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
#!/usr/bin/env bash
22

3-
module load anaconda3
4-
# must activate conda env before module loads
3+
module load anaconda3 # KGF: issue with my shell--- makes conda CLI return nothing
4+
# must activate conda env before module loads to ensure MPI, etc modules have
5+
# precedence for setting the environment variables, libraries, etc.
56
conda activate frnn
67
export OMPI_MCA_btl="tcp,self,vader" #sm"
78
module load cudatoolkit
89
module load cudnn
910

1011
module load openmpi/gcc/3.1.3/64
11-
module load hdf5/gcc/openmpi-1.10.2/1.10.0
12+
module load hdf5/gcc/openmpi-1.10.2/1.10.0 # like Adroit, this is older than version on Traverse, hdf5/gcc/openmpi-3.1.4/1.10.5

envs/traverse.cmd

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
#!/usr/bin/env bash
22

3-
module load anaconda3
4-
# must activate conda env before module loads
3+
module load anaconda3 # KGF: issue with my shell--- makes conda CLI return nothing
4+
# must activate conda env before module loads to ensure MPI, etc modules have
5+
# precedence for setting the environment variables, libraries, etc.
56
conda activate frnn
67
export OMPI_MCA_btl="tcp,self,vader"
7-
8+
# module load anaconda3 ------ KGF: DO NOT DO THIS--- reloads base
9+
# module purge
810
module load cudatoolkit
911
module load cudnn/cuda-10.1/7.6.1
1012
module load openmpi/gcc/3.1.4/64

plasma/conf_parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def parameters(input_file):
5555
h = myhash_signals(sig.all_signals_max_tol.values())*2
5656
else:
5757
h = myhash_signals(sig.all_signals.values())
58+
5859
params['paths']['global_normalizer_path'] = (
5960
output_path
6061
+ '/normalization/normalization_signal_group_{}.npz'.format(h))

plasma/global_vars.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
num_workers = 1
88
NUM_GPUS = 0
99
MY_GPU = 0
10+
# TODO(KGF): remove this (and all?) references to Keras backend
1011
backend = ''
1112
tf_ver = None
1213

plasma/models/builder.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
11
from __future__ import division, print_function
22
import plasma.global_vars as g
33
# KGF: the first time Keras is ever imported via mpi_learn.py -> mpi_runner.py
4-
import keras.backend as K
4+
# -> builder.py (here)
5+
import tensorflow as tf
56
# KGF: see below synchronization--- output is launched here
6-
from keras.models import Model # , Sequential
7+
#
78
# KGF: (was used only in hyper_build_model())
8-
from keras.layers import Input
9-
from keras.layers.core import (
9+
from tensorflow.keras.layers import (
10+
Input,
1011
Dense, Activation, Dropout, Lambda,
1112
Reshape, Flatten, Permute, # RepeatVector
13+
LSTM, CuDNNLSTM, SimpleRNN, BatchNormalization,
14+
Convolution1D, MaxPooling1D, TimeDistributed,
15+
Concatenate
1216
)
13-
from keras.layers import LSTM, CuDNNLSTM, SimpleRNN, BatchNormalization
14-
from keras.layers.convolutional import Convolution1D
15-
from keras.layers.pooling import MaxPooling1D
16-
# from keras.utils.data_utils import get_file
17-
from keras.layers.wrappers import TimeDistributed
18-
from keras.layers.merge import Concatenate
19-
from keras.callbacks import Callback
20-
from keras.regularizers import l2 # l1, l1_l2
17+
from tensorflow.keras.callbacks import Callback
18+
from tensorflow.keras.regularizers import l2 # l1, l1_l2
2119

2220
import re
2321
import os
@@ -275,7 +273,7 @@ def slicer_output_shape(input_shape, indices):
275273
bias_regularizer=l2(dense_regularization),
276274
activity_regularizer=l2(dense_regularization))(pre_rnn)
277275

278-
pre_rnn_model = Model(inputs=pre_rnn_input, outputs=pre_rnn)
276+
pre_rnn_model = tf.keras.Model(inputs=pre_rnn_input, outputs=pre_rnn)
279277
# TODO(KGF): uncomment following lines to get summary of pre-RNN model
280278
# from mpi4py import MPI
281279
# comm = MPI.COMM_WORLD
@@ -344,16 +342,17 @@ def slicer_output_shape(input_shape, indices):
344342
# x_out = TimeDistributed(Dense(100,activation='tanh')) (x_in)
345343
x_out = TimeDistributed(
346344
Dense(1, activation=output_activation))(x_in)
347-
model = Model(inputs=x_input, outputs=x_out)
345+
model = tf.keras.Model(inputs=x_input, outputs=x_out)
348346
# bug with tensorflow/Keras
349347
# TODO(KGF): what is this bug? this is the only direct "tensorflow"
350348
# import outside of mpi_runner.py and runner.py
351-
if (conf['model']['backend'] == 'tf'
352-
or conf['model']['backend'] == 'tensorflow'):
353-
first_time = "tensorflow" not in sys.modules
354-
import tensorflow as tf
355-
if first_time:
356-
K.get_session().run(tf.global_variables_initializer())
349+
# if (conf['model']['backend'] == 'tf'
350+
# or conf['model']['backend'] == 'tensorflow'):
351+
# first_time = "tensorflow" not in sys.modules
352+
# import tensorflow as tf
353+
# if first_time:
354+
# tf.compat.v1.keras.backend.get_session().run(
355+
# tf.global_variables_initializer())
357356
model.reset_states()
358357
return model
359358

@@ -362,6 +361,8 @@ def build_train_test_models(self):
362361

363362
def save_model_weights(self, model, epoch):
364363
save_path = self.get_save_path(epoch)
364+
full_model_save_path = self.get_save_path(epoch, ext='hdf5')
365+
model.save(full_model_save_path, overwrite=True)
365366
model.save_weights(save_path, overwrite=True)
366367
# try:
367368
if _has_onnx:
@@ -425,6 +426,8 @@ def load_model_weights(self, model, custom_path=None):
425426
def extract_id_and_epoch_from_filename(self, filename):
426427
regex = re.compile(r'-?\d+')
427428
numbers = [int(x) for x in regex.findall(filename)]
429+
# TODO: should ignore any files that dont match our naming convention
430+
# in this directory, especially since we are now writing full .hdf5 too
428431
if filename[-3:] == '.h5':
429432
assert len(numbers) == 3 # id, epoch number, and .h5 extension
430433
assert numbers[2] == 5 # .h5 extension
@@ -438,8 +441,8 @@ def get_all_saved_files(self):
438441
filenames = [name for name in os.listdir(path)
439442
if os.path.isfile(os.path.join(path, name))]
440443
epochs = []
441-
for file in filenames:
442-
curr_id, epoch = self.extract_id_and_epoch_from_filename(file)
444+
for fname in filenames:
445+
curr_id, epoch = self.extract_id_and_epoch_from_filename(fname)
443446
if curr_id == unique_id:
444447
epochs.append(epoch)
445448
return epochs

plasma/models/custom_loss.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import numpy as np
22

3-
# from keras import objectives
4-
from keras import backend as K
5-
from keras.losses import squared_hinge
3+
import tensorflow as tf
4+
import tensorflow.keras.backend as K
5+
from tensorflow.keras.losses import squared_hinge
66

7-
_EPSILON = K.epsilon()
7+
_EPSILON = tf.keras.backend.epsilon()
88

99

1010
def _loss_tensor(y_true, y_pred):

0 commit comments

Comments
 (0)