-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTicketActivityClassify.py
More file actions
168 lines (152 loc) · 6.33 KB
/
TicketActivityClassify.py
File metadata and controls
168 lines (152 loc) · 6.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
import pandas
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import json
import numpy as np
from keras.utils import np_utils
from keras.layers import Dense
from keras.models import model_from_json
from keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import LabelBinarizer
from datetime import datetime
import pandas as pd
import logging
import sys
from config import myapp_config
logging.basicConfig(
filename=os.path.join(myapp_config.LOG_PATH, myapp_config.CLASSIFIER_LOG_FILE),
level=logging.DEBUG,
format="%(asctime)s %(message)s",
datefmt="%m/%d/%Y %I:%M:%S %p",
)
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
# self.load_model(*args, **kwargs)
# It's a dataframe prediction
# if "ShortDescription"in kwargs:
# self.predict_text(*args, **kwargs)
# else:
# self.predict_dataset(*args, **kwargs)
class TicketActivityPredict:
def __init__(self, *args, **kwargs):
""" Init and Load model """
self._tokenizer = type("Tokenizer", (), {})
self.labels = []
self._model = type("model_from_json", (), {})
self.load_model()
def activity_predict(self, *args, **kwargs):
"""This function is responsible for loading and model and classification classes and based on the input data it returns one or more predictions"""
""" Inputs """
""" If ShortDescription is a true parameter we consider this as single request classification """
# Load Prediction Input file
if "ShortDescription" not in kwargs:
with open(
os.path.join(
myapp_config.OUTPUT_PATH, myapp_config.DEFAULT_PREDICT_INPUT_FILE
),
"r",
) as fp:
input_data = json.load(fp)
else:
# Single classification
short_description = [kwargs["ShortDescription"]]
# Category is optional
category = [kwargs.get("Category")]
def load_model(self, *args, **kwargs):
"""This functions loads any model"""
logging.info("Loading Model")
# load classes
class_file = kwargs.pop("class_file", "classes.txt")
logging.info("Loading classes file: " + class_file)
text_file = open(os.path.join(myapp_config.MODEL_PATH, class_file), "r")
for line in text_file:
line = line.replace("\n", "")
self.labels.append(line)
# load json model
model_name = kwargs.pop("model_name", "Activity_Classifier")
model_file_name = "model_" + model_name + ".json"
logging.info("Loading model called: " + model_file_name)
json_file = open(os.path.join(myapp_config.MODEL_PATH, model_file_name))
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights(
os.path.join(myapp_config.MODEL_PATH, "model_" + model_name + ".h5")
)
self._model = model
# load tockenizers
token_model_file = "tokenizer" + model_name + ".pickle"
logging.info("Loading model tokenizer: " + token_model_file)
import pickle
with open(
os.path.join(myapp_config.MODEL_PATH, token_model_file), "rb"
) as handle:
tokenizer = pickle.load(handle)
self._tokenizer = tokenizer
self._tokenizer = tokenizer
logging.info("Loading model tokenizer loaded")
def predict_dataset(self, *args, **kwargs):
""" Predicts an entire dataset """
logging.info("Running Entire Dataset Prediction")
df = pandas.DataFrame()
predictions = []
scores = []
for i in range(0, len(x_pred)):
prediction = model_predictions[i]
predicted_label = labels[np.argmax(prediction)]
ihighest_score = np.argmax(prediction)
predicted_score = prediction[ihighest_score]
predictions.append(predicted_label)
scores.append(predicted_score)
df["Prediction"] = predictions
df["Score"] = scores
timestamp = datetime.timestamp(datetime.now())
if "filename" in kwargs:
# Using absolute paths
filename = kwargs.pop("filename")
newfilename = os.path.split(filename)[1]
newfilename = os.path.splitext(newfilename)[0] + str(timestamp) + ".csv"
else:
newfilename = "Predictions_" + str(timestamp) + ".csv"
# Saves new file
newfilename = os.path.join(myapp_config.OUTPUT_PATH, newfilename)
df.to_csv(newfilename)
logging.info("File saved to {}".format(newfilename))
return newfilename
def predict_text(self, *args, **kwargs):
""" Individual sample prediction"""
# Single classification
short_description = [kwargs["ShortDescription"]]
# Category is optional
category = [kwargs.get("Category")]
logging.info("ShortDescription " + str(short_description))
logging.info("Category " + str(category))
predictions = []
scores = []
top5_pred_probs = []
# ShortDescriptions
x_pred = self._tokenizer.texts_to_matrix(short_description, mode="tfidf")
# Categorias
y_pred = self._tokenizer.texts_to_matrix(category, mode="tfidf")
model_predictions = self._model.predict(
{"main_input": x_pred, "cat_input": y_pred}
)
logging.info("Running Individual Ticket Prediction")
sorting = (-model_predictions).argsort()
sorted_ = sorting[0][:5]
for value in sorted_:
predicted_label = self.labels[value]
# just some rounding steps
prob = (model_predictions[0][value]) * 100
prob = "%.2f" % round(prob, 2)
top5_pred_probs.append([prob, predicted_label])
output = {
"short_description": short_description[0],
"category": category[0],
"top5_pred_probs": top5_pred_probs,
}
with open(
os.path.join(myapp_config.OUTPUT_PATH, "activity_predict_output.json"), "w"
) as fp:
json.dump(output, fp)
logging.info(output)
return output