Spaces:

unb-lamfo-nlp-mcti
/

NLP-W2V-CNN-Multi

Runtime error

App Files Files Community

chap0lin commited on Dec 6, 2022

Commit

84b2666

•

1 Parent(s): f91b404

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -32

app.py CHANGED Viewed

@@ -64,8 +64,6 @@ class callback(CallbackAny2Vec):
 def spacy_lemmatize_text(text):
   text = nlp(text)
   text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
@@ -105,25 +103,34 @@ def pre_process():
   return nltk.word_tokenize(sentenceLemStopped)
-def classify(new_column = True):
-  sentenceWords = json.loads(sentence.replace("'",'"'))
-  aux_vector = []
-  for word in sentenceWords:
-    aux_vector.append(reloaded_w2v_model.wv[word])
-  w2vWords = []
-  w2vWords.append(aux_vector)
-  MCTIinput_vector = pad_sequences(w2vWords, maxlen=2726, padding='pre')
-  value = reconstructed_model_CNN.predict(MCTIinput_vector)[0]
-  # if value >= 0.5:
-  #   return Image.open(r"elegivel.png")
-  # else:
-  #   return Image.open(r"inelegivel.png")
-  dataMCTI['opo_pre_tkn'] = sentencesMCTIList_xp8
-  dataMCTI['opo_pre'] = sentencesMCTIList_xp8_sentences
 def gen_output(data):
   data.to_excel("output.xlsx", index=False)
@@ -148,22 +155,22 @@ def app(operacao, resultado, dados):
   data.to_excel("output.xlsx")
   return "output.xlsx"
-  # if operacao == "Pré-processamento + Classificação" :
-  #   pre_process()
-  #   classify(resultado == "Nova Coluna")
-  #   output = gen_output()
-  #   return output
-  # elif operacao == "Apenas Pré-processamento" :
-  #   pre_process()
-  #   output = gen_output()
-  #   return output
-  # elif operacao == "Apenas Classificação" :
-  #   classify(resultado == "Nova Coluna")
-  #   output = gen_output()
-  #   return output
 iface = gr.Interface(
     fn=app,

 def spacy_lemmatize_text(text):
   text = nlp(text)
   text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
   return nltk.word_tokenize(sentenceLemStopped)
+def classify(df, new_column = True):
+  sentencesMCTIList_xp8 = df['opo_pre_tkn']
+  formatted_sentences = []
+  for sentence in sentencesMCTIList_xp8:
+    formatted_sentences.append(json.loads(sentence.replace("'",'"')))
+  words = list(reloaded_w2v_model.wv.vocab)
+  item_shape = np.shape(reloaded_w2v_model.wv[words[0]])
+  MCTIinput_vector = []
+  for sentence in formatted_sentences:
+    aux_vector = []
+    for word in sentence:
+      try:
+        aux_vector.append(reloaded_w2v_model.wv[word])
+      except:
+        aux_vector.append(np.zeros(item_shape))
+      MCTIinput_vector.append(aux_vector)
+  MCTIinput_padded = pad_sequences(MCTIinput_vector, maxlen=2726, padding='pre')
+  predictions = reconstructed_model_CNN.predict(MCTIinput_padded)
+  cleaned_up_predictions = []
+  for prediction in predictions:
+    cleaned_up_predictions.append(1 if prediction >= 0.5 else 0);
+  df['classification'] = cleaned_up_predictions
+  return df
 def gen_output(data):
   data.to_excel("output.xlsx", index=False)
   data.to_excel("output.xlsx")
   return "output.xlsx"
+  if operacao == "Pré-processamento + Classificação" :
+    pre_process()
+    classify(resultado == "Nova Coluna")
+    output = gen_output()
+    return output
+  elif operacao == "Apenas Pré-processamento" :
+    pre_process()
+    output = gen_output()
+    return output
+  elif operacao == "Apenas Classificação" :
+    df = classify(data, resultado == "Nova Coluna")
+    output = gen_output(df)
+    return output
 iface = gr.Interface(
     fn=app,