forked from shibing624/python-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path01.nmt.py
More file actions
310 lines (250 loc) · 9.85 KB
/
01.nmt.py
File metadata and controls
310 lines (250 loc) · 9.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description:
"""
# Imports we need.
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
import collections
from tensor2tensor import models
from tensor2tensor import problems
from tensor2tensor.layers import common_layers
from tensor2tensor.utils import trainer_lib
from tensor2tensor.utils import t2t_model
from tensor2tensor.utils import registry
from tensor2tensor.utils import metrics
# Enable TF Eager execution
tfe = tf.contrib.eager
tfe.enable_eager_execution()
# Other setup
Modes = tf.estimator.ModeKeys
# Setup some directories
data_dir = "./data"
tmp_dir = "./tmp"
train_dir = "./train"
checkpoint_dir = "./checkpoints"
tf.gfile.MakeDirs(data_dir)
tf.gfile.MakeDirs(tmp_dir)
tf.gfile.MakeDirs(train_dir)
tf.gfile.MakeDirs(checkpoint_dir)
gs_data_dir = "./tensor2tensor-data"
gs_ckpt_dir = "./tensor2tensor-checkpoints/"
tf.gfile.MakeDirs(gs_data_dir)
tf.gfile.MakeDirs(gs_ckpt_dir)
"""# Download MNIST and inspect it"""
# A Problem is a dataset together with some fixed pre-processing.
# It could be a translation dataset with a specific tokenization,
# or an image dataset with a specific resolution.
#
# There are many problems available in Tensor2Tensor
problems.available()
# Fetch the MNIST problem
mnist_problem = problems.problem("image_mnist")
# The generate_data method of a problem will download data and process it into
# a standard format ready for training and evaluation.
mnist_problem.generate_data(data_dir, tmp_dir)
# Now let's see the training MNIST data as Tensors.
mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next()
image = mnist_example["inputs"]
label = mnist_example["targets"]
plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))
print("Label: %d" % label.numpy())
"""# Translate from English to German with a pre-trained model"""
# Fetch the problem
ende_problem = problems.problem("translate_ende_wmt32k")
# Copy the vocab file locally so we can encode inputs and decode model outputs
# All vocabs are stored on GCS
vocab_name = "vocab.translate_ende_wmt32k.32768.subwords"
vocab_file = os.path.join(gs_data_dir, vocab_name)
!gsutil cp {vocab_file} {data_dir}
# Get the encoders from the problem
encoders = ende_problem.feature_encoders(data_dir)
# Setup helper functions for encoding and decoding
def encode(input_str, output_str=None):
"""Input str to features dict, ready for inference"""
inputs = encoders["inputs"].encode(input_str) + [1] # add EOS id
batch_inputs = tf.reshape(inputs, [1, -1, 1]) # Make it 3D.
return {"inputs": batch_inputs}
def decode(integers):
"""List of ints to str"""
integers = list(np.squeeze(integers))
if 1 in integers:
integers = integers[:integers.index(1)]
return encoders["inputs"].decode(np.squeeze(integers))
# # Generate and view the data
# # This cell is commented out because WMT data generation can take hours
# ende_problem.generate_data(data_dir, tmp_dir)
# example = tfe.Iterator(ende_problem.dataset(Modes.TRAIN, data_dir)).next()
# inputs = [int(x) for x in example["inputs"].numpy()] # Cast to ints.
# targets = [int(x) for x in example["targets"].numpy()] # Cast to ints.
# # Example inputs as int-tensor.
# print("Inputs, encoded:")
# print(inputs)
# print("Inputs, decoded:")
# # Example inputs as a sentence.
# print(decode(inputs))
# # Example targets as int-tensor.
# print("Targets, encoded:")
# print(targets)
# # Example targets as a sentence.
# print("Targets, decoded:")
# print(decode(targets))
# There are many models available in Tensor2Tensor
registry.list_models()
# Create hparams and the model
model_name = "transformer"
hparams_set = "transformer_base"
hparams = trainer_lib.create_hparams(hparams_set, data_dir=data_dir, problem_name="translate_ende_wmt32k")
# NOTE: Only create the model once when restoring from a checkpoint; it's a
# Layer and so subsequent instantiations will have different variable scopes
# that will not match the checkpoint.
translate_model = registry.model(model_name)(hparams, Modes.EVAL)
# Copy the pretrained checkpoint locally
ckpt_name = "transformer_ende_test"
gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)
!gsutil -q cp -R {gs_ckpt} {checkpoint_dir}
ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))
ckpt_path
# Restore and translate!
def translate(inputs):
encoded_inputs = encode(inputs)
with tfe.restore_variables_on_create(ckpt_path):
model_output = translate_model.infer(encoded_inputs)["outputs"]
return decode(model_output)
inputs = "The animal didn't cross the street because it was too tired"
outputs = translate(inputs)
print("Inputs: %s" % inputs)
print("Outputs: %s" % outputs)
"""## Attention Viz Utils"""
from tensor2tensor.visualization import attention
from tensor2tensor.data_generators import text_encoder
SIZE = 35
def encode_eval(input_str, output_str):
inputs = tf.reshape(encoders["inputs"].encode(input_str) + [1], [1, -1, 1, 1]) # Make it 3D.
outputs = tf.reshape(encoders["inputs"].encode(output_str) + [1], [1, -1, 1, 1]) # Make it 3D.
return {"inputs": inputs, "targets": outputs}
def get_att_mats():
enc_atts = []
dec_atts = []
encdec_atts = []
for i in range(hparams.num_hidden_layers):
enc_att = translate_model.attention_weights[
"transformer/body/encoder/layer_%i/self_attention/multihead_attention/dot_product_attention" % i][0]
dec_att = translate_model.attention_weights[
"transformer/body/decoder/layer_%i/self_attention/multihead_attention/dot_product_attention" % i][0]
encdec_att = translate_model.attention_weights[
"transformer/body/decoder/layer_%i/encdec_attention/multihead_attention/dot_product_attention" % i][0]
enc_atts.append(resize(enc_att))
dec_atts.append(resize(dec_att))
encdec_atts.append(resize(encdec_att))
return enc_atts, dec_atts, encdec_atts
def resize(np_mat):
# Sum across heads
np_mat = np_mat[:, :SIZE, :SIZE]
row_sums = np.sum(np_mat, axis=0)
# Normalize
layer_mat = np_mat / row_sums[np.newaxis, :]
lsh = layer_mat.shape
# Add extra dim for viz code to work.
layer_mat = np.reshape(layer_mat, (1, lsh[0], lsh[1], lsh[2]))
return layer_mat
def to_tokens(ids):
ids = np.squeeze(ids)
subtokenizer = hparams.problem_hparams.vocabulary['targets']
tokens = []
for _id in ids:
if _id == 0:
tokens.append('<PAD>')
elif _id == 1:
tokens.append('<EOS>')
elif _id == -1:
tokens.append('<NULL>')
else:
tokens.append(subtokenizer._subtoken_id_to_subtoken_string(_id))
return tokens
def call_html():
import IPython
print(IPython.core.display.HTML('''
<script src="/static/components/requirejs/require.js"></script>
<script>
requirejs.config({
paths: {
base: '/static/base',
"d3": "https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min",
jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',
},
});
</script>
'''))
"""## Display Attention"""
# Convert inputs and outputs to subwords
inp_text = to_tokens(encoders["inputs"].encode(inputs))
out_text = to_tokens(encoders["inputs"].encode(outputs))
# Run eval to collect attention weights
example = encode_eval(inputs, outputs)
with tfe.restore_variables_on_create(tf.train.latest_checkpoint(checkpoint_dir)):
translate_model.set_mode(Modes.EVAL)
translate_model(example)
# Get normalized attention weights for each layer
enc_atts, dec_atts, encdec_atts = get_att_mats()
call_html()
attention.show(inp_text, out_text, enc_atts, dec_atts, encdec_atts)
"""# Train a custom model on MNIST"""
# Create your own model
class MySimpleModel(t2t_model.T2TModel):
def body(self, features):
inputs = features["inputs"]
filters = self.hparams.hidden_size
h1 = tf.layers.conv2d(inputs, filters,
kernel_size=(5, 5), strides=(2, 2))
h2 = tf.layers.conv2d(tf.nn.relu(h1), filters,
kernel_size=(5, 5), strides=(2, 2))
return tf.layers.conv2d(tf.nn.relu(h2), filters,
kernel_size=(3, 3))
hparams = trainer_lib.create_hparams("basic_1", data_dir=data_dir, problem_name="image_mnist")
hparams.hidden_size = 64
model = MySimpleModel(hparams, Modes.TRAIN)
# Prepare for the training loop
# In Eager mode, opt.minimize must be passed a loss function wrapped with
# implicit_value_and_gradients
@tfe.implicit_value_and_gradients
def loss_fn(features):
_, losses = model(features)
return losses["training"]
# Setup the training data
BATCH_SIZE = 128
mnist_train_dataset = mnist_problem.dataset(Modes.TRAIN, data_dir)
mnist_train_dataset = mnist_train_dataset.repeat(None).batch(BATCH_SIZE)
optimizer = tf.train.AdamOptimizer()
# Train
NUM_STEPS = 500
for count, example in enumerate(tfe.Iterator(mnist_train_dataset)):
example["targets"] = tf.reshape(example["targets"], [BATCH_SIZE, 1, 1, 1]) # Make it 4D.
loss, gv = loss_fn(example)
optimizer.apply_gradients(gv)
if count % 50 == 0:
print("Step: %d, Loss: %.3f" % (count, loss.numpy()))
if count >= NUM_STEPS:
break
model.set_mode(Modes.EVAL)
mnist_eval_dataset = mnist_problem.dataset(Modes.EVAL, data_dir)
# Create eval metric accumulators for accuracy (ACC) and accuracy in
# top 5 (ACC_TOP5)
metrics_accum, metrics_result = metrics.create_eager_metrics(
[metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5])
for count, example in enumerate(tfe.Iterator(mnist_eval_dataset)):
if count >= 200:
break
# Make the inputs and targets 4D
example["inputs"] = tf.reshape(example["inputs"], [1, 28, 28, 1])
example["targets"] = tf.reshape(example["targets"], [1, 1, 1, 1])
# Call the model
predictions, _ = model(example)
# Compute and accumulate metrics
metrics_accum(predictions, example["targets"])
# Print out the averaged metric values on the eval data
for name, val in metrics_result().items():
print("%s: %.2f" % (name, val))