Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -26,116 +26,116 @@ import en_core_web_sm
|
|
26 |
nlp = en_core_web_sm.load()
|
27 |
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
|
46 |
|
47 |
-
#
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
#
|
55 |
-
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
|
62 |
-
|
63 |
-
|
64 |
|
65 |
|
66 |
|
67 |
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
|
94 |
-
|
95 |
-
|
96 |
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
|
106 |
-
|
107 |
|
108 |
-
|
109 |
-
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
|
118 |
-
|
119 |
|
120 |
-
#
|
121 |
-
#
|
122 |
-
#
|
123 |
-
#
|
124 |
|
125 |
-
|
126 |
-
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
|
132 |
|
133 |
-
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
|
140 |
def app(operacao, resultado, dados):
|
141 |
data = pd.read_excel(dados)
|
|
|
26 |
nlp = en_core_web_sm.load()
|
27 |
|
28 |
|
29 |
+
def recall_m(y_true, y_pred):
|
30 |
+
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
31 |
+
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
|
32 |
+
recall = true_positives / (possible_positives + K.epsilon())
|
33 |
+
return recall
|
34 |
|
35 |
+
def precision_m(y_true, y_pred):
|
36 |
+
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
|
37 |
+
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
|
38 |
+
precision = true_positives / (predicted_positives + K.epsilon())
|
39 |
+
return precision
|
40 |
|
41 |
+
def f1_m(y_true, y_pred):
|
42 |
+
precision = precision_m(y_true, y_pred)
|
43 |
+
recall = recall_m(y_true, y_pred)
|
44 |
+
return 2*((precision*recall)/(precision+recall+K.epsilon()))
|
45 |
|
46 |
|
47 |
+
#initialise callback class
|
48 |
+
class callback(CallbackAny2Vec):
|
49 |
+
"""
|
50 |
+
Print the loss value after each epoch
|
51 |
+
"""
|
52 |
+
def __init__(self):
|
53 |
+
self.epoch = 0
|
54 |
+
#gensim loss is cumulative, so we record previous values to print
|
55 |
+
self.loss_previous_step = 0
|
56 |
|
57 |
+
def on_epoch_end(self, model):
|
58 |
+
loss = model.get_latest_training_loss()
|
59 |
+
if self.epoch % 100 == 0:
|
60 |
+
print('Loss after epoch {}: {}'.format(self.epoch, loss-self.loss_previous_step))
|
61 |
|
62 |
+
self.epoch+= 1
|
63 |
+
self.loss_previous_step = loss
|
64 |
|
65 |
|
66 |
|
67 |
|
68 |
|
69 |
+
def spacy_lemmatize_text(text):
|
70 |
+
text = nlp(text)
|
71 |
+
text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
|
72 |
+
return text
|
73 |
|
74 |
+
def remove_accented_chars(text):
|
75 |
+
text = unicodedata.normalize('NFC', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
|
76 |
+
return text
|
77 |
|
78 |
+
def remove_special_characters(text, remove_digits=False):
|
79 |
+
pattern = r'[^a-zA-Z0-9\s]' if not remove_digits else r'[^a-zA-Z\s]'
|
80 |
+
text = re.sub(pattern, '', text)
|
81 |
+
return text
|
82 |
|
83 |
+
def remove_stopwords(text, is_lower_case=False, stopwords=None):
|
84 |
+
if not stopwords:
|
85 |
+
stopwords = nltk.corpus.stopwords.words('english')
|
86 |
+
tokens = nltk.word_tokenize(text)
|
87 |
+
tokens = [token.strip() for token in tokens]
|
88 |
|
89 |
+
if is_lower_case:
|
90 |
+
filtered_tokens = [token for token in tokens if token not in stopwords]
|
91 |
+
else:
|
92 |
+
filtered_tokens = [token for token in tokens if token.lower() not in stopwords]
|
93 |
|
94 |
+
filtered_text = ' '.join(filtered_tokens)
|
95 |
+
return filtered_text
|
96 |
|
97 |
|
98 |
+
def pre_process():
|
99 |
+
opo_texto_sem_caracteres_especiais = (remove_accented_chars(sentence))
|
100 |
+
sentenceExpanded = contractions.fix(opo_texto_sem_caracteres_especiais)
|
101 |
+
sentenceWithoutPunctuation = remove_special_characters(sentenceExpanded , remove_digits=True)
|
102 |
+
sentenceLowered = sentenceWithoutPunctuation.lower()
|
103 |
+
sentenceLemmatized = spacy_lemmatize_text(sentenceLowered)
|
104 |
+
sentenceLemStopped = remove_stopwords(sentenceLemmatized, is_lower_case=False)
|
105 |
|
106 |
+
return nltk.word_tokenize(sentenceLemStopped)
|
107 |
|
108 |
+
def classify(new_column = True):
|
109 |
+
sentenceWords = json.loads(sentence.replace("'",'"'))
|
110 |
|
111 |
+
aux_vector = []
|
112 |
+
for word in sentenceWords:
|
113 |
+
aux_vector.append(reloaded_w2v_model.wv[word])
|
114 |
+
w2vWords = []
|
115 |
+
w2vWords.append(aux_vector)
|
116 |
+
MCTIinput_vector = pad_sequences(w2vWords, maxlen=2726, padding='pre')
|
117 |
|
118 |
+
value = reconstructed_model_CNN.predict(MCTIinput_vector)[0]
|
119 |
|
120 |
+
# if value >= 0.5:
|
121 |
+
# return Image.open(r"elegivel.png")
|
122 |
+
# else:
|
123 |
+
# return Image.open(r"inelegivel.png")
|
124 |
|
125 |
+
dataMCTI['opo_pre_tkn'] = sentencesMCTIList_xp8
|
126 |
+
dataMCTI['opo_pre'] = sentencesMCTIList_xp8_sentences
|
127 |
|
128 |
+
def gen_output(data):
|
129 |
+
data.to_excel("output.xlsx", index=False)
|
130 |
+
return "output.xlsx"
|
131 |
|
132 |
|
133 |
+
reloaded_w2v_model = Word2Vec.load('word2vec_xp8.model')
|
134 |
|
135 |
+
reconstructed_model_CNN = keras.models.load_model("best weights CNN.h5",
|
136 |
+
custom_objects={'f1_m':f1_m,
|
137 |
+
"precision_m":precision_m,
|
138 |
+
"recall_m":recall_m})
|
139 |
|
140 |
def app(operacao, resultado, dados):
|
141 |
data = pd.read_excel(dados)
|