HMPhuoc commited on
Commit
f5d4e06
1 Parent(s): cf845e8

add tokenize function

Browse files
Files changed (1) hide show
  1. app.py +10 -60
app.py CHANGED
@@ -16,98 +16,56 @@ from underthesea import word_tokenize
16
 
17
  from phoBERT import BERT_predict
18
 
19
- # Load tokenizer
20
- # fp = Path(__file__).with_name('tokenizer.pkl')
21
- # with open(fp,mode="rb") as f:
22
- # tokenizer = pickle.load(f)
23
 
24
- #Load LSTM
25
- #fp = Path(__file__).with_name('lstm_model.h5')
26
  LSTM_model = tf.keras.models.load_model('lstm_model.tf')
27
 
28
- #Load GRU
29
- #fp = Path(__file__).with_name('gru_model.h5')
30
  GRU_model = tf.keras.models.load_model('gru_model.tf')
31
 
32
-
33
- def tokenizer_pad(tokenizer,comment_text,max_length=200):
34
-
35
- comment_text = word_tokenize(comment_text, format="text")
36
- comment_text = [comment_text]
37
- tokenized_text = tokenizer.texts_to_sequences(comment_text)
38
-
39
- padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post")
40
-
41
- return padded_sequences
42
-
43
  def LSTM_predict(x):
44
- # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
45
-
46
 
47
  pred_proba = LSTM_model.predict([x])[0]
48
 
49
  pred_proba = [round(i,2) for i in pred_proba]
50
 
51
- #print(pred_proba)
52
-
53
  return pred_proba
54
 
55
  def GRU_predict(x):
56
- # x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
57
-
58
 
59
  pred_proba = GRU_model.predict([x])[0]
60
 
61
  pred_proba = [round(i,2) for i in pred_proba]
62
 
63
- #print(pred_proba)
64
-
65
  return pred_proba
66
 
67
- def plot(result):
68
- label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
69
- data = pd.DataFrame()
70
- data['Nhãn'] = label
71
- data['Điểm'] = result
72
-
73
- #print(data)
74
 
75
- p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] )
76
- return p
77
- pass
78
 
79
  def judge(x):
80
 
81
- label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
82
  result = []
83
- judge_result = []
84
 
85
- x = ud.normalize('NFKC', x)
86
- x = word_tokenize(x, format="text")
87
 
88
  lstm_pred = LSTM_predict(x)
89
  gru_pred = GRU_predict(x)
90
- #bert_pred = BERT_predict(x)
91
- #print(result)
92
 
93
- return_result = 'Result'
94
  result_lstm = np.round(lstm_pred, 2)
95
  result_gru = np.round(gru_pred, 2)
96
- #result_bert = np.round(bert_pred, 2)
97
 
98
  for i in range(6):
99
  result.append((result_lstm[i]+result_gru[i])/2)
100
 
101
  return (result)
102
 
 
103
  def judgePlus(x):
104
 
105
- label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
106
  result = []
107
- judge_result = []
108
 
109
- x = ud.normalize('NFKC', x)
110
- x = word_tokenize(x, format="text")
111
 
112
  lstm_pred = LSTM_predict(x)
113
  gru_pred = GRU_predict(x)
@@ -117,11 +75,10 @@ def judgePlus(x):
117
  bert_pred = np.average([lstm_pred, gru_pred], axis=0)
118
 
119
 
120
- return_result = 'Result'
121
  result_lstm = np.round(lstm_pred, 2)
122
  result_gru = np.round(gru_pred, 2)
123
  result_bert = np.round(bert_pred, 2)
124
- #result_bert = np.round(bert_pred, 2)
125
  if((result_lstm[0]+result_gru[0])<(result_bert[0]*2)):
126
  for i in range(6):
127
  result.append((result_bert[i])/1)
@@ -131,26 +88,19 @@ def judgePlus(x):
131
 
132
  return (result)
133
 
 
134
  def judgeBert(x):
135
 
136
- label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
137
  result = []
138
- judge_result = []
139
 
140
- x = ud.normalize('NFKC', x)
141
- x = word_tokenize(x, format="text")
142
 
143
-
144
  try:
145
  bert_pred = BERT_predict(x)
146
  except:
147
  bert_pred = np.zeros(6, dtype=float)
148
 
149
-
150
- return_result = 'Result'
151
-
152
  result_bert = np.round(bert_pred, 2)
153
- #result_bert = np.round(bert_pred, 2)
154
 
155
  for i in range(6):
156
  result.append((result_bert[i])/1)
 
16
 
17
  from phoBERT import BERT_predict
18
 
 
 
 
 
19
 
 
 
20
  LSTM_model = tf.keras.models.load_model('lstm_model.tf')
21
 
 
 
22
  GRU_model = tf.keras.models.load_model('gru_model.tf')
23
 
 
 
 
 
 
 
 
 
 
 
 
24
  def LSTM_predict(x):
 
 
25
 
26
  pred_proba = LSTM_model.predict([x])[0]
27
 
28
  pred_proba = [round(i,2) for i in pred_proba]
29
 
 
 
30
  return pred_proba
31
 
32
  def GRU_predict(x):
 
 
33
 
34
  pred_proba = GRU_model.predict([x])[0]
35
 
36
  pred_proba = [round(i,2) for i in pred_proba]
37
 
 
 
38
  return pred_proba
39
 
40
+ def tokenize(x):
41
+ x = ud.normalize('NFKC', x)
42
+ x = word_tokenize(x, format="text")
43
+ return x
 
 
 
44
 
 
 
 
45
 
46
  def judge(x):
47
 
 
48
  result = []
 
49
 
50
+ x = tokenize(x)
 
51
 
52
  lstm_pred = LSTM_predict(x)
53
  gru_pred = GRU_predict(x)
 
 
54
 
 
55
  result_lstm = np.round(lstm_pred, 2)
56
  result_gru = np.round(gru_pred, 2)
 
57
 
58
  for i in range(6):
59
  result.append((result_lstm[i]+result_gru[i])/2)
60
 
61
  return (result)
62
 
63
+
64
  def judgePlus(x):
65
 
 
66
  result = []
 
67
 
68
+ x = tokenize(x)
 
69
 
70
  lstm_pred = LSTM_predict(x)
71
  gru_pred = GRU_predict(x)
 
75
  bert_pred = np.average([lstm_pred, gru_pred], axis=0)
76
 
77
 
 
78
  result_lstm = np.round(lstm_pred, 2)
79
  result_gru = np.round(gru_pred, 2)
80
  result_bert = np.round(bert_pred, 2)
81
+
82
  if((result_lstm[0]+result_gru[0])<(result_bert[0]*2)):
83
  for i in range(6):
84
  result.append((result_bert[i])/1)
 
88
 
89
  return (result)
90
 
91
+
92
  def judgeBert(x):
93
 
 
94
  result = []
 
95
 
96
+ x = tokenize(x)
 
97
 
 
98
  try:
99
  bert_pred = BERT_predict(x)
100
  except:
101
  bert_pred = np.zeros(6, dtype=float)
102
 
 
 
 
103
  result_bert = np.round(bert_pred, 2)
 
104
 
105
  for i in range(6):
106
  result.append((result_bert[i])/1)