chap0lin commited on
Commit
6202c7b
1 Parent(s): 9baf651

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -2
app.py CHANGED
@@ -1,15 +1,163 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def app(operacao, resultado, dados):
 
5
  boxes = {'Color': ['Green','Green','Green','Blue','Blue','Red','Red','Red'],
6
  'Shape': ['Rectangle','Rectangle','Square','Rectangle','Square','Square','Square','Rectangle'],
7
  'Price': [10,15,5,5,10,15,15,5]
8
  }
9
  df = pd.DataFrame(boxes, columns= ['Color','Shape','Price'])
10
  df.to_excel("output.xlsx")
11
-
12
- return "output.xlsx"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  iface = gr.Interface(
15
  fn=app,
 
1
+ import os
2
+ import re
3
+ import contractions
4
+ import unicodedata
5
+ import spacy
6
+ import keras
7
+ import requests
8
+ import shutil
9
+ import json
10
  import gradio as gr
11
  import pandas as pd
12
+ import numpy as np
13
+ from PIL import Image
14
+ from keras import backend as K
15
+ from keras.utils.data_utils import pad_sequences
16
+ from gensim.models import Word2Vec
17
+ from gensim.models.callbacks import CallbackAny2Vec
18
+
19
+ import nltk
20
+ nltk.download('punkt')
21
+ nltk.download('stopwords')
22
+
23
+ os.system('python -m spacy download en_core_web_sm')]
24
+
25
+ import en_core_web_sm
26
+ nlp = en_core_web_sm.load()
27
+
28
+
29
+ def recall_m(y_true, y_pred):
30
+ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
31
+ possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
32
+ recall = true_positives / (possible_positives + K.epsilon())
33
+ return recall
34
+
35
+ def precision_m(y_true, y_pred):
36
+ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
37
+ predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
38
+ precision = true_positives / (predicted_positives + K.epsilon())
39
+ return precision
40
+
41
+ def f1_m(y_true, y_pred):
42
+ precision = precision_m(y_true, y_pred)
43
+ recall = recall_m(y_true, y_pred)
44
+ return 2*((precision*recall)/(precision+recall+K.epsilon()))
45
+
46
+
47
+ #initialise callback class
48
+ class callback(CallbackAny2Vec):
49
+ """
50
+ Print the loss value after each epoch
51
+ """
52
+ def __init__(self):
53
+ self.epoch = 0
54
+ #gensim loss is cumulative, so we record previous values to print
55
+ self.loss_previous_step = 0
56
+
57
+ def on_epoch_end(self, model):
58
+ loss = model.get_latest_training_loss()
59
+ if self.epoch % 100 == 0:
60
+ print('Loss after epoch {}: {}'.format(self.epoch, loss-self.loss_previous_step))
61
+
62
+ self.epoch+= 1
63
+ self.loss_previous_step = loss
64
+
65
+
66
+
67
+
68
+
69
+ def spacy_lemmatize_text(text):
70
+ text = nlp(text)
71
+ text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
72
+ return text
73
+
74
+ def remove_accented_chars(text):
75
+ text = unicodedata.normalize('NFC', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
76
+ return text
77
+
78
+ def remove_special_characters(text, remove_digits=False):
79
+ pattern = r'[^a-zA-Z0-9\s]' if not remove_digits else r'[^a-zA-Z\s]'
80
+ text = re.sub(pattern, '', text)
81
+ return text
82
+
83
+ def remove_stopwords(text, is_lower_case=False, stopwords=None):
84
+ if not stopwords:
85
+ stopwords = nltk.corpus.stopwords.words('english')
86
+ tokens = nltk.word_tokenize(text)
87
+ tokens = [token.strip() for token in tokens]
88
+
89
+ if is_lower_case:
90
+ filtered_tokens = [token for token in tokens if token not in stopwords]
91
+ else:
92
+ filtered_tokens = [token for token in tokens if token.lower() not in stopwords]
93
+
94
+ filtered_text = ' '.join(filtered_tokens)
95
+ return filtered_text
96
+
97
+
98
+ def pre_process():
99
+ opo_texto_sem_caracteres_especiais = (remove_accented_chars(sentence))
100
+ sentenceExpanded = contractions.fix(opo_texto_sem_caracteres_especiais)
101
+ sentenceWithoutPunctuation = remove_special_characters(sentenceExpanded , remove_digits=True)
102
+ sentenceLowered = sentenceWithoutPunctuation.lower()
103
+ sentenceLemmatized = spacy_lemmatize_text(sentenceLowered)
104
+ sentenceLemStopped = remove_stopwords(sentenceLemmatized, is_lower_case=False)
105
+
106
+ return nltk.word_tokenize(sentenceLemStopped)
107
+
108
+ def classify(new_column = True):
109
+ sentenceWords = json.loads(sentence.replace("'",'"'))
110
+
111
+ aux_vector = []
112
+ for word in sentenceWords:
113
+ aux_vector.append(reloaded_w2v_model.wv[word])
114
+ w2vWords = []
115
+ w2vWords.append(aux_vector)
116
+ MCTIinput_vector = pad_sequences(w2vWords, maxlen=2726, padding='pre')
117
+
118
+ value = reconstructed_model_CNN.predict(MCTIinput_vector)[0]
119
+
120
+ if value >= 0.5:
121
+ return Image.open(r"elegivel.png")
122
+ else:
123
+ return Image.open(r"inelegivel.png")
124
+
125
+ def gen_output(data):
126
+ return "output.xlsx"
127
+
128
+
129
+ reloaded_w2v_model = Word2Vec.load('word2vec_xp8.model')
130
+
131
+ reconstructed_model_CNN = keras.models.load_model("best weights CNN.h5",
132
+ custom_objects={'f1_m':f1_m,
133
+ "precision_m":precision_m,
134
+ "recall_m":recall_m})
135
 
136
  def app(operacao, resultado, dados):
137
+
138
  boxes = {'Color': ['Green','Green','Green','Blue','Blue','Red','Red','Red'],
139
  'Shape': ['Rectangle','Rectangle','Square','Rectangle','Square','Square','Square','Rectangle'],
140
  'Price': [10,15,5,5,10,15,15,5]
141
  }
142
  df = pd.DataFrame(boxes, columns= ['Color','Shape','Price'])
143
  df.to_excel("output.xlsx")
144
+
145
+ if operacao === "Pré-processamento + Classificação" :
146
+ pre_process()
147
+ classify(resultado == "Nova Coluna")
148
+ output = gen_output()
149
+
150
+ return output
151
+ elif operacao === "Apenas Pré-processamento" :
152
+ pre_process()
153
+ output = gen_output()
154
+
155
+ return output
156
+ elif operacao === "Apenas Classificação" :
157
+ classify(resultado == "Nova Coluna")
158
+ output = gen_output()
159
+
160
+ return output
161
 
162
  iface = gr.Interface(
163
  fn=app,