python-tutorial/29t2t/01.nmt.py at master · devdio/python-tutorial

History

310 lines (250 loc) · 9.85 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

# -*- coding: utf-8 -*-

"""

@author:XuMing（[email protected])

@description:

"""

# Imports we need.

import tensorflow as tf

import matplotlib.pyplot as plt

import numpy as np

import os

import collections

from tensor2tensor import models

from tensor2tensor import problems

from tensor2tensor.layers import common_layers

from tensor2tensor.utils import trainer_lib

from tensor2tensor.utils import t2t_model

from tensor2tensor.utils import registry

from tensor2tensor.utils import metrics

# Enable TF Eager execution

tfe = tf.contrib.eager

tfe.enable_eager_execution()

# Other setup

Modes = tf.estimator.ModeKeys

# Setup some directories

data_dir = "./data"

tmp_dir = "./tmp"

train_dir = "./train"

checkpoint_dir = "./checkpoints"

tf.gfile.MakeDirs(data_dir)

tf.gfile.MakeDirs(tmp_dir)

tf.gfile.MakeDirs(train_dir)

tf.gfile.MakeDirs(checkpoint_dir)

gs_data_dir = "./tensor2tensor-data"

gs_ckpt_dir = "./tensor2tensor-checkpoints/"

tf.gfile.MakeDirs(gs_data_dir)

tf.gfile.MakeDirs(gs_ckpt_dir)

"""# Download MNIST and inspect it"""

# A Problem is a dataset together with some fixed pre-processing.

# It could be a translation dataset with a specific tokenization,

# or an image dataset with a specific resolution.

# There are many problems available in Tensor2Tensor

problems.available()

# Fetch the MNIST problem

mnist_problem = problems.problem("image_mnist")

# The generate_data method of a problem will download data and process it into

# a standard format ready for training and evaluation.

mnist_problem.generate_data(data_dir, tmp_dir)

# Now let's see the training MNIST data as Tensors.

mnist_example = tfe.Iterator(mnist_problem.dataset(Modes.TRAIN, data_dir)).next()

image = mnist_example["inputs"]

label = mnist_example["targets"]

plt.imshow(image.numpy()[:, :, 0].astype(np.float32), cmap=plt.get_cmap('gray'))

print("Label: %d" % label.numpy())

"""# Translate from English to German with a pre-trained model"""

# Fetch the problem

ende_problem = problems.problem("translate_ende_wmt32k")

# Copy the vocab file locally so we can encode inputs and decode model outputs

# All vocabs are stored on GCS

vocab_name = "vocab.translate_ende_wmt32k.32768.subwords"

vocab_file = os.path.join(gs_data_dir, vocab_name)

!gsutil cp {vocab_file} {data_dir}

# Get the encoders from the problem

encoders = ende_problem.feature_encoders(data_dir)

# Setup helper functions for encoding and decoding

def encode(input_str, output_str=None):

"""Input str to features dict, ready for inference"""

inputs = encoders["inputs"].encode(input_str) + [1] # add EOS id

batch_inputs = tf.reshape(inputs, [1, -1, 1]) # Make it 3D.

return {"inputs": batch_inputs}

def decode(integers):

"""List of ints to str"""

integers = list(np.squeeze(integers))

if 1 in integers:

integers = integers[:integers.index(1)]

return encoders["inputs"].decode(np.squeeze(integers))

# # Generate and view the data

# # This cell is commented out because WMT data generation can take hours

# ende_problem.generate_data(data_dir, tmp_dir)

# example = tfe.Iterator(ende_problem.dataset(Modes.TRAIN, data_dir)).next()

# inputs = [int(x) for x in example["inputs"].numpy()] # Cast to ints.

# targets = [int(x) for x in example["targets"].numpy()] # Cast to ints.

# # Example inputs as int-tensor.

# print("Inputs, encoded:")

# print(inputs)

# print("Inputs, decoded:")

# # Example inputs as a sentence.

# print(decode(inputs))

# # Example targets as int-tensor.

# print("Targets, encoded:")

# print(targets)

# # Example targets as a sentence.

# print("Targets, decoded:")

# print(decode(targets))

# There are many models available in Tensor2Tensor

registry.list_models()

# Create hparams and the model

model_name = "transformer"

hparams_set = "transformer_base"

hparams = trainer_lib.create_hparams(hparams_set, data_dir=data_dir, problem_name="translate_ende_wmt32k")

# NOTE: Only create the model once when restoring from a checkpoint; it's a

# Layer and so subsequent instantiations will have different variable scopes

# that will not match the checkpoint.

translate_model = registry.model(model_name)(hparams, Modes.EVAL)

# Copy the pretrained checkpoint locally

ckpt_name = "transformer_ende_test"

gs_ckpt = os.path.join(gs_ckpt_dir, ckpt_name)

!gsutil -q cp -R {gs_ckpt} {checkpoint_dir}

ckpt_path = tf.train.latest_checkpoint(os.path.join(checkpoint_dir, ckpt_name))

ckpt_path

# Restore and translate!

def translate(inputs):

encoded_inputs = encode(inputs)

with tfe.restore_variables_on_create(ckpt_path):

model_output = translate_model.infer(encoded_inputs)["outputs"]

return decode(model_output)

inputs = "The animal didn't cross the street because it was too tired"

outputs = translate(inputs)

print("Inputs: %s" % inputs)

print("Outputs: %s" % outputs)

"""## Attention Viz Utils"""

from tensor2tensor.visualization import attention

from tensor2tensor.data_generators import text_encoder

SIZE = 35

def encode_eval(input_str, output_str):

inputs = tf.reshape(encoders["inputs"].encode(input_str) + [1], [1, -1, 1, 1]) # Make it 3D.

outputs = tf.reshape(encoders["inputs"].encode(output_str) + [1], [1, -1, 1, 1]) # Make it 3D.

return {"inputs": inputs, "targets": outputs}

def get_att_mats():

enc_atts = []

dec_atts = []

encdec_atts = []

for i in range(hparams.num_hidden_layers):

enc_att = translate_model.attention_weights[

"transformer/body/encoder/layer_%i/self_attention/multihead_attention/dot_product_attention" % i][0]

dec_att = translate_model.attention_weights[

"transformer/body/decoder/layer_%i/self_attention/multihead_attention/dot_product_attention" % i][0]

encdec_att = translate_model.attention_weights[

"transformer/body/decoder/layer_%i/encdec_attention/multihead_attention/dot_product_attention" % i][0]

enc_atts.append(resize(enc_att))

dec_atts.append(resize(dec_att))

encdec_atts.append(resize(encdec_att))

return enc_atts, dec_atts, encdec_atts

def resize(np_mat):

# Sum across heads

np_mat = np_mat[:, :SIZE, :SIZE]

row_sums = np.sum(np_mat, axis=0)

# Normalize

layer_mat = np_mat / row_sums[np.newaxis, :]

lsh = layer_mat.shape

# Add extra dim for viz code to work.

layer_mat = np.reshape(layer_mat, (1, lsh[0], lsh[1], lsh[2]))

return layer_mat

def to_tokens(ids):

ids = np.squeeze(ids)

subtokenizer = hparams.problem_hparams.vocabulary['targets']

tokens = []

for _id in ids:

if _id == 0:

tokens.append('<PAD>')

elif _id == 1:

tokens.append('<EOS>')

elif _id == -1:

tokens.append('<NULL>')

else:

tokens.append(subtokenizer._subtoken_id_to_subtoken_string(_id))

return tokens

def call_html():

import IPython

print(IPython.core.display.HTML('''

requirejs.config({

paths: {

base: '/static/base',

"d3": "https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min",

jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',

});

</script>

'''))

"""## Display Attention"""

# Convert inputs and outputs to subwords

inp_text = to_tokens(encoders["inputs"].encode(inputs))

out_text = to_tokens(encoders["inputs"].encode(outputs))

# Run eval to collect attention weights

example = encode_eval(inputs, outputs)

with tfe.restore_variables_on_create(tf.train.latest_checkpoint(checkpoint_dir)):

translate_model.set_mode(Modes.EVAL)

translate_model(example)

# Get normalized attention weights for each layer

enc_atts, dec_atts, encdec_atts = get_att_mats()

call_html()

attention.show(inp_text, out_text, enc_atts, dec_atts, encdec_atts)

"""# Train a custom model on MNIST"""

# Create your own model

class MySimpleModel(t2t_model.T2TModel):

def body(self, features):

inputs = features["inputs"]

filters = self.hparams.hidden_size

h1 = tf.layers.conv2d(inputs, filters,

kernel_size=(5, 5), strides=(2, 2))

h2 = tf.layers.conv2d(tf.nn.relu(h1), filters,

kernel_size=(5, 5), strides=(2, 2))

return tf.layers.conv2d(tf.nn.relu(h2), filters,

kernel_size=(3, 3))

hparams = trainer_lib.create_hparams("basic_1", data_dir=data_dir, problem_name="image_mnist")

hparams.hidden_size = 64

model = MySimpleModel(hparams, Modes.TRAIN)

# Prepare for the training loop

# In Eager mode, opt.minimize must be passed a loss function wrapped with

# implicit_value_and_gradients

@tfe.implicit_value_and_gradients

def loss_fn(features):

_, losses = model(features)

return losses["training"]

# Setup the training data

BATCH_SIZE = 128

mnist_train_dataset = mnist_problem.dataset(Modes.TRAIN, data_dir)

mnist_train_dataset = mnist_train_dataset.repeat(None).batch(BATCH_SIZE)

optimizer = tf.train.AdamOptimizer()

# Train

NUM_STEPS = 500

for count, example in enumerate(tfe.Iterator(mnist_train_dataset)):

example["targets"] = tf.reshape(example["targets"], [BATCH_SIZE, 1, 1, 1]) # Make it 4D.

loss, gv = loss_fn(example)

optimizer.apply_gradients(gv)

if count % 50 == 0:

print("Step: %d, Loss: %.3f" % (count, loss.numpy()))

if count >= NUM_STEPS:

break

model.set_mode(Modes.EVAL)

mnist_eval_dataset = mnist_problem.dataset(Modes.EVAL, data_dir)

# Create eval metric accumulators for accuracy (ACC) and accuracy in

# top 5 (ACC_TOP5)

metrics_accum, metrics_result = metrics.create_eager_metrics(

[metrics.Metrics.ACC, metrics.Metrics.ACC_TOP5])

for count, example in enumerate(tfe.Iterator(mnist_eval_dataset)):

if count >= 200:

break

# Make the inputs and targets 4D

example["inputs"] = tf.reshape(example["inputs"], [1, 28, 28, 1])

example["targets"] = tf.reshape(example["targets"], [1, 1, 1, 1])

# Call the model

predictions, _ = model(example)

# Compute and accumulate metrics

metrics_accum(predictions, example["targets"])

# Print out the averaged metric values on the eval data

for name, val in metrics_result().items():

print("%s: %.2f" % (name, val))

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

01.nmt.py

Latest commit

History

01.nmt.py

File metadata and controls