1+ print ('before import ' )
2+
3+ import tensorflow as tf
4+
5+ print ('after import' )
6+
7+ # from keras.models import Sequential, Model
8+ # from keras.layers.embeddings import Embedding
9+ # from keras.layers import Input, Activation, Dense, Permute, Dropout, add, dot, concatenate
10+ # from keras.layers import LSTM
11+ # from keras.utils.data_utils import get_file
12+ # from keras.preprocessing.sequence import pad_sequences
13+ # from keras.models import load_model
14+ # from sklearn.metrics import confusion_matrix
15+ # from sklearn import metrics
16+ # from functools import reduce
17+ # import pickle
18+ # import tarfile
19+ # import numpy as np
20+ # import re
21+ # import os
22+ # import time
23+ # import flask
24+ # from flask import request
25+ # from flask import render_template_string, render_template
26+ # from flask import Flask, jsonify
27+
28+ # app = Flask(__name__, static_url_path='')
29+ # app.config["DEBUG"] = True
30+
31+ # def tokenize(sent):
32+ # '''Return the tokens of a sentence including punctuation.
33+ # >>> tokenize('Bob dropped the apple. Where is the apple?')
34+ # ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?']
35+ # '''
36+ # return [x.strip() for x in re.split('(\W+)', sent) if x.strip()]
37+
38+
39+ # def parse_stories(lines, only_supporting=False):
40+ # '''Parse stories provided in the bAbi tasks format
41+ # If only_supporting is true, only the sentences
42+ # that support the answer are kept.
43+ # '''
44+ # data = []
45+ # story = []
46+ # for line in lines:
47+ # line = line.decode('utf-8').strip()
48+ # nid, line = line.split(' ', 1)
49+ # nid = int(nid)
50+ # if nid == 1:
51+ # story = []
52+ # if '\t' in line:
53+ # q, a, supporting = line.split('\t')
54+ # q = tokenize(q)
55+ # substory = None
56+ # if only_supporting:
57+ # # Only select the related substory
58+ # supporting = map(int, supporting.split())
59+ # substory = [story[i - 1] for i in supporting]
60+ # else:
61+ # # Provide all the substories
62+ # substory = [x for x in story if x]
63+ # data.append((substory, q, a))
64+ # story.append('')
65+ # else:
66+ # sent = tokenize(line)
67+ # story.append(sent)
68+ # return data
69+
70+
71+ # def get_stories(f, only_supporting=False, max_length=None):
72+ # '''Given a file name, read the file,
73+ # retrieve the stories,
74+ # and then convert the sentences into a single story.
75+ # If max_length is supplied,
76+ # any stories longer than max_length tokens will be discarded.
77+ # '''
78+ # data = parse_stories(f.readlines(), only_supporting=only_supporting)
79+ # flatten = lambda data: reduce(lambda x, y: x + y, data)
80+ # data = [(flatten(story), q, answer) for story, q, answer in data if not max_length or len(flatten(story)) < max_length]
81+ # return data
82+
83+
84+ # def vectorize_stories(data):
85+ # inputs, queries, answers = [], [], []
86+ # for story, query, answer in data:
87+ # inputs.append([word_idx[w] for w in story])
88+ # queries.append([word_idx[w] for w in query])
89+ # answers.append(word_idx[answer])
90+ # return (pad_sequences(inputs, maxlen=story_maxlen),
91+ # pad_sequences(queries, maxlen=query_maxlen),
92+ # np.array(answers))
93+
94+ # # ----------- PATHS ---------------------
95+ # filepath = os.path.dirname(__file__)
96+ # try:
97+ # filepath = os.environ["PROJECT_DIR"]
98+ # except KeyError:
99+ # pass
100+
101+ # print("Current Directory: " + filepath)
102+ # # Load the model, if it exists, load vocab too
103+ # modelpath = os.path.join(filepath,"chatbot.h5")
104+ # print("Model Path: " + modelpath)
105+ # model = load_model(modelpath)
106+
107+ # pickle_vocabpath = os.path.join(filepath,"vocab.pkl")
108+ # print("Pickle Vocab Path: " + pickle_vocabpath)
109+ # vocab = pickle.load( open(pickle_vocabpath, "rb" ))
110+ # try:
111+ # path = os.path.join(filepath, 'babi-tasks-v1-2.tar10.gz')
112+ # print("Babi Tasks Path: " +path)
113+ # except:
114+ # # print('Error downloading dataset, please download it manually:\n'
115+ # # '$ wget http://www.thespermwhale.com/jaseweston/babi/babi-tasks-v1-2.tar8.gz\n'
116+ # # '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
117+ # raise
118+ # tar = tarfile.open(path)
119+
120+ # challenges = {
121+ # # QA1 with 10,000 samples
122+ # 'single_supporting_fact_10k': 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt',
123+ # # QA2 with 10,000 samples
124+ # 'two_supporting_facts_10k': 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt',
125+ # }
126+
127+
128+
129+ # challenge_type = 'single_supporting_fact_10k'
130+ # challenge = challenges[challenge_type]
131+
132+ # #print('Extracting stories for the challenge:', challenge_type)
133+ # train_stories = get_stories(tar.extractfile(challenge.format('train')))
134+ # test_stories = get_stories(tar.extractfile(challenge.format('test')))
135+
136+ # vocab = set()
137+ # for story, q, answer in train_stories + test_stories:
138+ # vocab |= set(story + q + [answer])
139+ # vocab = sorted(vocab)
140+
141+
142+ # # Reserve 0 for masking via pad_sequences
143+ # vocab_size = len(vocab) + 1
144+ # story_maxlen = max(map(len, (x for x, _, _ in train_stories + test_stories)))
145+ # query_maxlen = max(map(len, (x for _, x, _ in train_stories + test_stories)))
146+
147+ # word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
148+ # inputs_train, queries_train, answers_train = vectorize_stories(train_stories)
149+ # inputs_test, queries_test, answers_test = vectorize_stories(test_stories)
150+
151+ # pred = model.predict([inputs_test, queries_test])
152+ # # See what the predictions look like, they are just probabilities of each class.
153+ # #print(pred)
154+
155+ # pred = np.argmax(pred,axis=1)
156+ # #print(pred)
157+
158+ # score = metrics.accuracy_score(answers_test, pred)
159+ # #print("Final accuracy: {}".format(score))
160+
161+
162+ # #print("Remember, I only know these words: {}".format(vocab))
163+ # #print()
164+ # story = "Task today is tasktoday. Task for yesterday was taskyesterday. Task for tomorrow is tasktomorrow."
165+
166+
167+
1168
2- from keras .models import Sequential , Model
3- from keras .layers .embeddings import Embedding
4- from keras .layers import Input , Activation , Dense , Permute , Dropout , add , dot , concatenate
5- from keras .layers import LSTM
6- from keras .utils .data_utils import get_file
7- from keras .preprocessing .sequence import pad_sequences
8- from keras .models import load_model
9- from sklearn .metrics import confusion_matrix
10- from sklearn import metrics
11- from functools import reduce
12- import pickle
13- import tarfile
14- import numpy as np
15- import re
16- import os
17- import time
18- import flask
19- from flask import request
20- from flask import render_template_string , render_template
21- from flask import Flask , jsonify
22-
23- app = Flask (__name__ , static_url_path = '' )
24- app .config ["DEBUG" ] = True
25-
26- def tokenize (sent ):
27- '''Return the tokens of a sentence including punctuation.
28- >>> tokenize('Bob dropped the apple. Where is the apple?')
29- ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?']
30- '''
31- return [x .strip () for x in re .split ('(\W+)' , sent ) if x .strip ()]
32-
33-
34- def parse_stories (lines , only_supporting = False ):
35- '''Parse stories provided in the bAbi tasks format
36- If only_supporting is true, only the sentences
37- that support the answer are kept.
38- '''
39- data = []
40- story = []
41- for line in lines :
42- line = line .decode ('utf-8' ).strip ()
43- nid , line = line .split (' ' , 1 )
44- nid = int (nid )
45- if nid == 1 :
46- story = []
47- if '\t ' in line :
48- q , a , supporting = line .split ('\t ' )
49- q = tokenize (q )
50- substory = None
51- if only_supporting :
52- # Only select the related substory
53- supporting = map (int , supporting .split ())
54- substory = [story [i - 1 ] for i in supporting ]
55- else :
56- # Provide all the substories
57- substory = [x for x in story if x ]
58- data .append ((substory , q , a ))
59- story .append ('' )
60- else :
61- sent = tokenize (line )
62- story .append (sent )
63- return data
64-
65-
66- def get_stories (f , only_supporting = False , max_length = None ):
67- '''Given a file name, read the file,
68- retrieve the stories,
69- and then convert the sentences into a single story.
70- If max_length is supplied,
71- any stories longer than max_length tokens will be discarded.
72- '''
73- data = parse_stories (f .readlines (), only_supporting = only_supporting )
74- flatten = lambda data : reduce (lambda x , y : x + y , data )
75- data = [(flatten (story ), q , answer ) for story , q , answer in data if not max_length or len (flatten (story )) < max_length ]
76- return data
77-
78-
79- def vectorize_stories (data ):
80- inputs , queries , answers = [], [], []
81- for story , query , answer in data :
82- inputs .append ([word_idx [w ] for w in story ])
83- queries .append ([word_idx [w ] for w in query ])
84- answers .append (word_idx [answer ])
85- return (pad_sequences (inputs , maxlen = story_maxlen ),
86- pad_sequences (queries , maxlen = query_maxlen ),
87- np .array (answers ))
88-
89- # ----------- PATHS ---------------------
90- filepath = os .path .dirname (__file__ )
91- try :
92- filepath = os .environ ["PROJECT_DIR" ]
93- except KeyError :
94- pass
95-
96- print ("Current Directory: " + filepath )
97- # Load the model, if it exists, load vocab too
98- modelpath = os .path .join (filepath ,"chatbot.h5" )
99- print ("Model Path: " + modelpath )
100- model = load_model (modelpath )
101-
102- pickle_vocabpath = os .path .join (filepath ,"vocab.pkl" )
103- print ("Pickle Vocab Path: " + pickle_vocabpath )
104- vocab = pickle .load ( open (pickle_vocabpath , "rb" ))
105- try :
106- path = os .path .join (filepath , 'babi-tasks-v1-2.tar10.gz' )
107- print ("Babi Tasks Path: " + path )
108- except :
109- # print('Error downloading dataset, please download it manually:\n'
110- # '$ wget http://www.thespermwhale.com/jaseweston/babi/babi-tasks-v1-2.tar8.gz\n'
111- # '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
112- raise
113- tar = tarfile .open (path )
114-
115- challenges = {
116- # QA1 with 10,000 samples
117- 'single_supporting_fact_10k' : 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt' ,
118- # QA2 with 10,000 samples
119- 'two_supporting_facts_10k' : 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt' ,
120- }
121-
122-
123-
124- challenge_type = 'single_supporting_fact_10k'
125- challenge = challenges [challenge_type ]
126-
127- #print('Extracting stories for the challenge:', challenge_type)
128- train_stories = get_stories (tar .extractfile (challenge .format ('train' )))
129- test_stories = get_stories (tar .extractfile (challenge .format ('test' )))
130-
131- vocab = set ()
132- for story , q , answer in train_stories + test_stories :
133- vocab |= set (story + q + [answer ])
134- vocab = sorted (vocab )
135-
136-
137- # Reserve 0 for masking via pad_sequences
138- vocab_size = len (vocab ) + 1
139- story_maxlen = max (map (len , (x for x , _ , _ in train_stories + test_stories )))
140- query_maxlen = max (map (len , (x for _ , x , _ in train_stories + test_stories )))
141-
142- word_idx = dict ((c , i + 1 ) for i , c in enumerate (vocab ))
143- inputs_train , queries_train , answers_train = vectorize_stories (train_stories )
144- inputs_test , queries_test , answers_test = vectorize_stories (test_stories )
145-
146- pred = model .predict ([inputs_test , queries_test ])
147- # See what the predictions look like, they are just probabilities of each class.
148- #print(pred)
149-
150- pred = np .argmax (pred ,axis = 1 )
151- #print(pred)
152-
153- score = metrics .accuracy_score (answers_test , pred )
154- #print("Final accuracy: {}".format(score))
155-
156-
157- #print("Remember, I only know these words: {}".format(vocab))
158- #print()
159- story = "Task today is tasktoday. Task for yesterday was taskyesterday. Task for tomorrow is tasktomorrow."
160-
161-
162-
163-
164- @app .route ('/api' , methods = ['GET' ])
165- def home ():
169+ # @app.route('/api', methods=['GET'])
170+ # def home():
166171
167- query = request .args ['query' ]
168- #query = "Where is Daniel?"
169- adhoc_stories = (tokenize (story ), tokenize (query ), '?' )
172+ # query = request.args['query']
173+ # #query = "Where is Daniel?"
174+ # adhoc_stories = (tokenize(story), tokenize(query), '?')
170175
171- adhoc_train , adhoc_query , adhoc_answer = vectorize_stories ([adhoc_stories ])
172- pred = model .predict ([adhoc_train , adhoc_query ])
173- pred = np .argmax (pred ,axis = 1 )
174- answer = "Answer: {}({})" .format (vocab [pred [0 ]- 1 ],pred )
175- return jsonify ({'tasks' : answer })
176+ # adhoc_train, adhoc_query, adhoc_answer = vectorize_stories([adhoc_stories])
177+ # pred = model.predict([adhoc_train, adhoc_query])
178+ # pred = np.argmax(pred,axis=1)
179+ # answer = "Answer: {}({})".format(vocab[pred[0]-1],pred)
180+ # return jsonify({'tasks': answer})
176181
177182
178- @app .route ('/' , methods = ['GET' ])
179- def render_static ():
180- return render_template ("NLP_API_CALL.html" , title = '' )
181- #return render_template('%NLP_API_CALL.html' % page_name)
182-
183- app_port = 5000
184- try :
185- app_port = os .environ ["PORT" ]
186- except KeyError :
187- pass
188- app .run (port = app_port , host = '0.0.0.0' )
183+ # @app.route('/' , methods=['GET'])
184+ # def render_static():
185+ # return render_template("NLP_API_CALL.html", title = '')
186+ # #return render_template('%NLP_API_CALL.html' % page_name)
187+
188+ # app_port = 5000
189+ # try:
190+ # app_port = os.environ["PORT"]
191+ # except KeyError:
192+ # pass
193+ # app.run(port=app_port, host='0.0.0.0')
0 commit comments