chap0lin commited on
Commit
84b2666
1 Parent(s): f91b404

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -32
app.py CHANGED
@@ -64,8 +64,6 @@ class callback(CallbackAny2Vec):
64
 
65
 
66
 
67
-
68
-
69
  def spacy_lemmatize_text(text):
70
  text = nlp(text)
71
  text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
@@ -105,25 +103,34 @@ def pre_process():
105
 
106
  return nltk.word_tokenize(sentenceLemStopped)
107
 
108
- def classify(new_column = True):
109
- sentenceWords = json.loads(sentence.replace("'",'"'))
110
-
111
- aux_vector = []
112
- for word in sentenceWords:
113
- aux_vector.append(reloaded_w2v_model.wv[word])
114
- w2vWords = []
115
- w2vWords.append(aux_vector)
116
- MCTIinput_vector = pad_sequences(w2vWords, maxlen=2726, padding='pre')
 
 
 
 
 
 
 
 
 
117
 
118
- value = reconstructed_model_CNN.predict(MCTIinput_vector)[0]
119
-
120
- # if value >= 0.5:
121
- # return Image.open(r"elegivel.png")
122
- # else:
123
- # return Image.open(r"inelegivel.png")
124
 
125
- dataMCTI['opo_pre_tkn'] = sentencesMCTIList_xp8
126
- dataMCTI['opo_pre'] = sentencesMCTIList_xp8_sentences
 
 
 
 
 
 
127
 
128
  def gen_output(data):
129
  data.to_excel("output.xlsx", index=False)
@@ -148,22 +155,22 @@ def app(operacao, resultado, dados):
148
  data.to_excel("output.xlsx")
149
  return "output.xlsx"
150
 
151
- # if operacao == "Pré-processamento + Classificação" :
152
- # pre_process()
153
- # classify(resultado == "Nova Coluna")
154
- # output = gen_output()
155
 
156
- # return output
157
- # elif operacao == "Apenas Pré-processamento" :
158
- # pre_process()
159
- # output = gen_output()
160
 
161
- # return output
162
- # elif operacao == "Apenas Classificação" :
163
- # classify(resultado == "Nova Coluna")
164
- # output = gen_output()
165
 
166
- # return output
167
 
168
  iface = gr.Interface(
169
  fn=app,
 
64
 
65
 
66
 
 
 
67
  def spacy_lemmatize_text(text):
68
  text = nlp(text)
69
  text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
 
103
 
104
  return nltk.word_tokenize(sentenceLemStopped)
105
 
106
+ def classify(df, new_column = True):
107
+ sentencesMCTIList_xp8 = df['opo_pre_tkn']
108
+
109
+ formatted_sentences = []
110
+ for sentence in sentencesMCTIList_xp8:
111
+ formatted_sentences.append(json.loads(sentence.replace("'",'"')))
112
+
113
+ words = list(reloaded_w2v_model.wv.vocab)
114
+ item_shape = np.shape(reloaded_w2v_model.wv[words[0]])
115
+ MCTIinput_vector = []
116
+ for sentence in formatted_sentences:
117
+ aux_vector = []
118
+ for word in sentence:
119
+ try:
120
+ aux_vector.append(reloaded_w2v_model.wv[word])
121
+ except:
122
+ aux_vector.append(np.zeros(item_shape))
123
+ MCTIinput_vector.append(aux_vector)
124
 
 
 
 
 
 
 
125
 
126
+ MCTIinput_padded = pad_sequences(MCTIinput_vector, maxlen=2726, padding='pre')
127
+
128
+ predictions = reconstructed_model_CNN.predict(MCTIinput_padded)
129
+ cleaned_up_predictions = []
130
+ for prediction in predictions:
131
+ cleaned_up_predictions.append(1 if prediction >= 0.5 else 0);
132
+ df['classification'] = cleaned_up_predictions
133
+ return df
134
 
135
  def gen_output(data):
136
  data.to_excel("output.xlsx", index=False)
 
155
  data.to_excel("output.xlsx")
156
  return "output.xlsx"
157
 
158
+ if operacao == "Pré-processamento + Classificação" :
159
+ pre_process()
160
+ classify(resultado == "Nova Coluna")
161
+ output = gen_output()
162
 
163
+ return output
164
+ elif operacao == "Apenas Pré-processamento" :
165
+ pre_process()
166
+ output = gen_output()
167
 
168
+ return output
169
+ elif operacao == "Apenas Classificação" :
170
+ df = classify(data, resultado == "Nova Coluna")
171
+ output = gen_output(df)
172
 
173
+ return output
174
 
175
  iface = gr.Interface(
176
  fn=app,