Skip to content

Commit 894b0c8

Browse files
committed
test
1 parent d0e37f5 commit 894b0c8

2 files changed

Lines changed: 190 additions & 185 deletions

File tree

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
FROM python:3.6
1+
FROM tensorflow/tensorflow
22
ENV PROJECT_DIR="/app"
33
WORKDIR /app
44
COPY . .
5-
RUN pip install 'https://github.com/mind/wheels/releases/download/tf1.2.1-cpu/tensorflow-1.2.1-cp36-cp36m-linux_x86_64.whl'
5+
# RUN pip install 'https://github.com/mind/wheels/releases/download/tf1.2.1-cpu/tensorflow-1.2.1-cp36-cp36m-linux_x86_64.whl'
66
RUN pip install -r requirements.txt
77
EXPOSE 5000
88
CMD ["python", "nlp.py"]

nlp.py

Lines changed: 188 additions & 183 deletions
Original file line numberDiff line numberDiff line change
@@ -1,188 +1,193 @@
1+
print('before import ')
2+
3+
import tensorflow as tf
4+
5+
print('after import')
6+
7+
# from keras.models import Sequential, Model
8+
# from keras.layers.embeddings import Embedding
9+
# from keras.layers import Input, Activation, Dense, Permute, Dropout, add, dot, concatenate
10+
# from keras.layers import LSTM
11+
# from keras.utils.data_utils import get_file
12+
# from keras.preprocessing.sequence import pad_sequences
13+
# from keras.models import load_model
14+
# from sklearn.metrics import confusion_matrix
15+
# from sklearn import metrics
16+
# from functools import reduce
17+
# import pickle
18+
# import tarfile
19+
# import numpy as np
20+
# import re
21+
# import os
22+
# import time
23+
# import flask
24+
# from flask import request
25+
# from flask import render_template_string, render_template
26+
# from flask import Flask, jsonify
27+
28+
# app = Flask(__name__, static_url_path='')
29+
# app.config["DEBUG"] = True
30+
31+
# def tokenize(sent):
32+
# '''Return the tokens of a sentence including punctuation.
33+
# >>> tokenize('Bob dropped the apple. Where is the apple?')
34+
# ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?']
35+
# '''
36+
# return [x.strip() for x in re.split('(\W+)', sent) if x.strip()]
37+
38+
39+
# def parse_stories(lines, only_supporting=False):
40+
# '''Parse stories provided in the bAbi tasks format
41+
# If only_supporting is true, only the sentences
42+
# that support the answer are kept.
43+
# '''
44+
# data = []
45+
# story = []
46+
# for line in lines:
47+
# line = line.decode('utf-8').strip()
48+
# nid, line = line.split(' ', 1)
49+
# nid = int(nid)
50+
# if nid == 1:
51+
# story = []
52+
# if '\t' in line:
53+
# q, a, supporting = line.split('\t')
54+
# q = tokenize(q)
55+
# substory = None
56+
# if only_supporting:
57+
# # Only select the related substory
58+
# supporting = map(int, supporting.split())
59+
# substory = [story[i - 1] for i in supporting]
60+
# else:
61+
# # Provide all the substories
62+
# substory = [x for x in story if x]
63+
# data.append((substory, q, a))
64+
# story.append('')
65+
# else:
66+
# sent = tokenize(line)
67+
# story.append(sent)
68+
# return data
69+
70+
71+
# def get_stories(f, only_supporting=False, max_length=None):
72+
# '''Given a file name, read the file,
73+
# retrieve the stories,
74+
# and then convert the sentences into a single story.
75+
# If max_length is supplied,
76+
# any stories longer than max_length tokens will be discarded.
77+
# '''
78+
# data = parse_stories(f.readlines(), only_supporting=only_supporting)
79+
# flatten = lambda data: reduce(lambda x, y: x + y, data)
80+
# data = [(flatten(story), q, answer) for story, q, answer in data if not max_length or len(flatten(story)) < max_length]
81+
# return data
82+
83+
84+
# def vectorize_stories(data):
85+
# inputs, queries, answers = [], [], []
86+
# for story, query, answer in data:
87+
# inputs.append([word_idx[w] for w in story])
88+
# queries.append([word_idx[w] for w in query])
89+
# answers.append(word_idx[answer])
90+
# return (pad_sequences(inputs, maxlen=story_maxlen),
91+
# pad_sequences(queries, maxlen=query_maxlen),
92+
# np.array(answers))
93+
94+
# # ----------- PATHS ---------------------
95+
# filepath = os.path.dirname(__file__)
96+
# try:
97+
# filepath = os.environ["PROJECT_DIR"]
98+
# except KeyError:
99+
# pass
100+
101+
# print("Current Directory: " + filepath)
102+
# # Load the model, if it exists, load vocab too
103+
# modelpath = os.path.join(filepath,"chatbot.h5")
104+
# print("Model Path: " + modelpath)
105+
# model = load_model(modelpath)
106+
107+
# pickle_vocabpath = os.path.join(filepath,"vocab.pkl")
108+
# print("Pickle Vocab Path: " + pickle_vocabpath)
109+
# vocab = pickle.load( open(pickle_vocabpath, "rb" ))
110+
# try:
111+
# path = os.path.join(filepath, 'babi-tasks-v1-2.tar10.gz')
112+
# print("Babi Tasks Path: " +path)
113+
# except:
114+
# # print('Error downloading dataset, please download it manually:\n'
115+
# # '$ wget http://www.thespermwhale.com/jaseweston/babi/babi-tasks-v1-2.tar8.gz\n'
116+
# # '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
117+
# raise
118+
# tar = tarfile.open(path)
119+
120+
# challenges = {
121+
# # QA1 with 10,000 samples
122+
# 'single_supporting_fact_10k': 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt',
123+
# # QA2 with 10,000 samples
124+
# 'two_supporting_facts_10k': 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt',
125+
# }
126+
127+
128+
129+
# challenge_type = 'single_supporting_fact_10k'
130+
# challenge = challenges[challenge_type]
131+
132+
# #print('Extracting stories for the challenge:', challenge_type)
133+
# train_stories = get_stories(tar.extractfile(challenge.format('train')))
134+
# test_stories = get_stories(tar.extractfile(challenge.format('test')))
135+
136+
# vocab = set()
137+
# for story, q, answer in train_stories + test_stories:
138+
# vocab |= set(story + q + [answer])
139+
# vocab = sorted(vocab)
140+
141+
142+
# # Reserve 0 for masking via pad_sequences
143+
# vocab_size = len(vocab) + 1
144+
# story_maxlen = max(map(len, (x for x, _, _ in train_stories + test_stories)))
145+
# query_maxlen = max(map(len, (x for _, x, _ in train_stories + test_stories)))
146+
147+
# word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
148+
# inputs_train, queries_train, answers_train = vectorize_stories(train_stories)
149+
# inputs_test, queries_test, answers_test = vectorize_stories(test_stories)
150+
151+
# pred = model.predict([inputs_test, queries_test])
152+
# # See what the predictions look like, they are just probabilities of each class.
153+
# #print(pred)
154+
155+
# pred = np.argmax(pred,axis=1)
156+
# #print(pred)
157+
158+
# score = metrics.accuracy_score(answers_test, pred)
159+
# #print("Final accuracy: {}".format(score))
160+
161+
162+
# #print("Remember, I only know these words: {}".format(vocab))
163+
# #print()
164+
# story = "Task today is tasktoday. Task for yesterday was taskyesterday. Task for tomorrow is tasktomorrow."
165+
166+
167+
1168

2-
from keras.models import Sequential, Model
3-
from keras.layers.embeddings import Embedding
4-
from keras.layers import Input, Activation, Dense, Permute, Dropout, add, dot, concatenate
5-
from keras.layers import LSTM
6-
from keras.utils.data_utils import get_file
7-
from keras.preprocessing.sequence import pad_sequences
8-
from keras.models import load_model
9-
from sklearn.metrics import confusion_matrix
10-
from sklearn import metrics
11-
from functools import reduce
12-
import pickle
13-
import tarfile
14-
import numpy as np
15-
import re
16-
import os
17-
import time
18-
import flask
19-
from flask import request
20-
from flask import render_template_string, render_template
21-
from flask import Flask, jsonify
22-
23-
app = Flask(__name__, static_url_path='')
24-
app.config["DEBUG"] = True
25-
26-
def tokenize(sent):
27-
'''Return the tokens of a sentence including punctuation.
28-
>>> tokenize('Bob dropped the apple. Where is the apple?')
29-
['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?']
30-
'''
31-
return [x.strip() for x in re.split('(\W+)', sent) if x.strip()]
32-
33-
34-
def parse_stories(lines, only_supporting=False):
35-
'''Parse stories provided in the bAbi tasks format
36-
If only_supporting is true, only the sentences
37-
that support the answer are kept.
38-
'''
39-
data = []
40-
story = []
41-
for line in lines:
42-
line = line.decode('utf-8').strip()
43-
nid, line = line.split(' ', 1)
44-
nid = int(nid)
45-
if nid == 1:
46-
story = []
47-
if '\t' in line:
48-
q, a, supporting = line.split('\t')
49-
q = tokenize(q)
50-
substory = None
51-
if only_supporting:
52-
# Only select the related substory
53-
supporting = map(int, supporting.split())
54-
substory = [story[i - 1] for i in supporting]
55-
else:
56-
# Provide all the substories
57-
substory = [x for x in story if x]
58-
data.append((substory, q, a))
59-
story.append('')
60-
else:
61-
sent = tokenize(line)
62-
story.append(sent)
63-
return data
64-
65-
66-
def get_stories(f, only_supporting=False, max_length=None):
67-
'''Given a file name, read the file,
68-
retrieve the stories,
69-
and then convert the sentences into a single story.
70-
If max_length is supplied,
71-
any stories longer than max_length tokens will be discarded.
72-
'''
73-
data = parse_stories(f.readlines(), only_supporting=only_supporting)
74-
flatten = lambda data: reduce(lambda x, y: x + y, data)
75-
data = [(flatten(story), q, answer) for story, q, answer in data if not max_length or len(flatten(story)) < max_length]
76-
return data
77-
78-
79-
def vectorize_stories(data):
80-
inputs, queries, answers = [], [], []
81-
for story, query, answer in data:
82-
inputs.append([word_idx[w] for w in story])
83-
queries.append([word_idx[w] for w in query])
84-
answers.append(word_idx[answer])
85-
return (pad_sequences(inputs, maxlen=story_maxlen),
86-
pad_sequences(queries, maxlen=query_maxlen),
87-
np.array(answers))
88-
89-
# ----------- PATHS ---------------------
90-
filepath = os.path.dirname(__file__)
91-
try:
92-
filepath = os.environ["PROJECT_DIR"]
93-
except KeyError:
94-
pass
95-
96-
print("Current Directory: " + filepath)
97-
# Load the model, if it exists, load vocab too
98-
modelpath = os.path.join(filepath,"chatbot.h5")
99-
print("Model Path: " + modelpath)
100-
model = load_model(modelpath)
101-
102-
pickle_vocabpath = os.path.join(filepath,"vocab.pkl")
103-
print("Pickle Vocab Path: " + pickle_vocabpath)
104-
vocab = pickle.load( open(pickle_vocabpath, "rb" ))
105-
try:
106-
path = os.path.join(filepath, 'babi-tasks-v1-2.tar10.gz')
107-
print("Babi Tasks Path: " +path)
108-
except:
109-
# print('Error downloading dataset, please download it manually:\n'
110-
# '$ wget http://www.thespermwhale.com/jaseweston/babi/babi-tasks-v1-2.tar8.gz\n'
111-
# '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
112-
raise
113-
tar = tarfile.open(path)
114-
115-
challenges = {
116-
# QA1 with 10,000 samples
117-
'single_supporting_fact_10k': 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt',
118-
# QA2 with 10,000 samples
119-
'two_supporting_facts_10k': 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt',
120-
}
121-
122-
123-
124-
challenge_type = 'single_supporting_fact_10k'
125-
challenge = challenges[challenge_type]
126-
127-
#print('Extracting stories for the challenge:', challenge_type)
128-
train_stories = get_stories(tar.extractfile(challenge.format('train')))
129-
test_stories = get_stories(tar.extractfile(challenge.format('test')))
130-
131-
vocab = set()
132-
for story, q, answer in train_stories + test_stories:
133-
vocab |= set(story + q + [answer])
134-
vocab = sorted(vocab)
135-
136-
137-
# Reserve 0 for masking via pad_sequences
138-
vocab_size = len(vocab) + 1
139-
story_maxlen = max(map(len, (x for x, _, _ in train_stories + test_stories)))
140-
query_maxlen = max(map(len, (x for _, x, _ in train_stories + test_stories)))
141-
142-
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
143-
inputs_train, queries_train, answers_train = vectorize_stories(train_stories)
144-
inputs_test, queries_test, answers_test = vectorize_stories(test_stories)
145-
146-
pred = model.predict([inputs_test, queries_test])
147-
# See what the predictions look like, they are just probabilities of each class.
148-
#print(pred)
149-
150-
pred = np.argmax(pred,axis=1)
151-
#print(pred)
152-
153-
score = metrics.accuracy_score(answers_test, pred)
154-
#print("Final accuracy: {}".format(score))
155-
156-
157-
#print("Remember, I only know these words: {}".format(vocab))
158-
#print()
159-
story = "Task today is tasktoday. Task for yesterday was taskyesterday. Task for tomorrow is tasktomorrow."
160-
161-
162-
163-
164-
@app.route('/api', methods=['GET'])
165-
def home():
169+
# @app.route('/api', methods=['GET'])
170+
# def home():
166171

167-
query = request.args['query']
168-
#query = "Where is Daniel?"
169-
adhoc_stories = (tokenize(story), tokenize(query), '?')
172+
# query = request.args['query']
173+
# #query = "Where is Daniel?"
174+
# adhoc_stories = (tokenize(story), tokenize(query), '?')
170175

171-
adhoc_train, adhoc_query, adhoc_answer = vectorize_stories([adhoc_stories])
172-
pred = model.predict([adhoc_train, adhoc_query])
173-
pred = np.argmax(pred,axis=1)
174-
answer = "Answer: {}({})".format(vocab[pred[0]-1],pred)
175-
return jsonify({'tasks': answer})
176+
# adhoc_train, adhoc_query, adhoc_answer = vectorize_stories([adhoc_stories])
177+
# pred = model.predict([adhoc_train, adhoc_query])
178+
# pred = np.argmax(pred,axis=1)
179+
# answer = "Answer: {}({})".format(vocab[pred[0]-1],pred)
180+
# return jsonify({'tasks': answer})
176181

177182

178-
@app.route('/' , methods=['GET'])
179-
def render_static():
180-
return render_template("NLP_API_CALL.html", title = '')
181-
#return render_template('%NLP_API_CALL.html' % page_name)
182-
183-
app_port = 5000
184-
try:
185-
app_port = os.environ["PORT"]
186-
except KeyError:
187-
pass
188-
app.run(port=app_port, host='0.0.0.0')
183+
# @app.route('/' , methods=['GET'])
184+
# def render_static():
185+
# return render_template("NLP_API_CALL.html", title = '')
186+
# #return render_template('%NLP_API_CALL.html' % page_name)
187+
188+
# app_port = 5000
189+
# try:
190+
# app_port = os.environ["PORT"]
191+
# except KeyError:
192+
# pass
193+
# app.run(port=app_port, host='0.0.0.0')

0 commit comments

Comments
 (0)