MINHCT commited on
Commit
3c09b95
β€’
1 Parent(s): db06d58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -1
app.py CHANGED
@@ -8,6 +8,8 @@ from tensorflow.keras.models import load_model
8
  from tensorflow.keras.preprocessing.text import Tokenizer
9
  from tensorflow.keras.preprocessing.sequence import pad_sequences
10
  import numpy as np
 
 
11
 
12
  # load all the models and vectorizer (global vocabulary)
13
  Seq_model = load_model("LSTM.h5") # Sequential
@@ -18,6 +20,11 @@ svm_model = joblib.load('svm_model.joblib')
18
  vectorizer = joblib.load("vectorizer.joblib") # global vocabulary (used for Logistic, SVC)
19
  tokenizer = joblib.load("tokenizer.joblib") # used for LSTM
20
 
 
 
 
 
 
21
  # Decode label function
22
  # {'business': 0, 'entertainment': 1, 'health': 2, 'politics': 3, 'sport': 4}
23
  def decodedLabel(input_number):
@@ -86,6 +93,13 @@ def process_api(text):
86
  processed_text = vectorizer.transform([text])
87
  sequence = tokenizer.texts_to_sequences([text])
88
  padded_sequence = pad_sequences(sequence, maxlen=1000, padding='post')
 
 
 
 
 
 
 
89
 
90
  # Get the predicted result from models
91
  Logistic_Predicted = logistic_model.predict(processed_text).tolist() # Logistic Model
@@ -97,8 +111,11 @@ def process_api(text):
97
  Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
98
  svm_new_probs = SVM_model.decision_function(processed_text)
99
  svm_probs = svm_model.predict_proba(svm_new_probs)
100
-
101
  predicted_label_index = np.argmax(Seq_Predicted)
 
 
 
 
102
  # ----------- Debug Logs -----------
103
  logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
104
  svc_debug = decodedLabel(int(SVM_Predicted[0]))
@@ -115,6 +132,9 @@ def process_api(text):
115
 
116
  'predicted_label_lstm': decodedLabel(int(predicted_label_index)),
117
  'probability_lstm': f"{int(float(np.max(Seq_Predicted))*10000//100)}%",
 
 
 
118
 
119
  'Article_Content': text
120
  }
@@ -234,6 +254,10 @@ if url:
234
  "predicted_label": result.get("predicted_label_lstm"),
235
  "probability": result.get("probability_lstm")
236
  }
 
 
 
 
237
  })
238
 
239
  st.divider() # πŸ‘ˆ Draws a horizontal rule
 
8
  from tensorflow.keras.preprocessing.text import Tokenizer
9
  from tensorflow.keras.preprocessing.sequence import pad_sequences
10
  import numpy as np
11
+ from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
12
+ import torch
13
 
14
  # load all the models and vectorizer (global vocabulary)
15
  Seq_model = load_model("LSTM.h5") # Sequential
 
20
  vectorizer = joblib.load("vectorizer.joblib") # global vocabulary (used for Logistic, SVC)
21
  tokenizer = joblib.load("tokenizer.joblib") # used for LSTM
22
 
23
+ tokenizer1 = DistilBertTokenizer.from_pretrained("tokenizer_bert")
24
+ model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=5)
25
+ model.load_state_dict(torch.load("fine_tuned_bert_model1.pth", map_location=torch.device('gpu')))
26
+ model = model.to('gpu')
27
+
28
  # Decode label function
29
  # {'business': 0, 'entertainment': 1, 'health': 2, 'politics': 3, 'sport': 4}
30
  def decodedLabel(input_number):
 
93
  processed_text = vectorizer.transform([text])
94
  sequence = tokenizer.texts_to_sequences([text])
95
  padded_sequence = pad_sequences(sequence, maxlen=1000, padding='post')
96
+
97
+ new_encoding = tokenizer1([text], truncation=True, padding=True, return_tensors="pt")
98
+ input_ids = new_encoding['input_ids']
99
+ attention_mask = new_encoding['attention_mask']
100
+ with torch.no_grad():
101
+ output = model(input_ids, attention_mask=attention_mask)
102
+ logits = output.logits
103
 
104
  # Get the predicted result from models
105
  Logistic_Predicted = logistic_model.predict(processed_text).tolist() # Logistic Model
 
111
  Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
112
  svm_new_probs = SVM_model.decision_function(processed_text)
113
  svm_probs = svm_model.predict_proba(svm_new_probs)
 
114
  predicted_label_index = np.argmax(Seq_Predicted)
115
+
116
+ bert_probabilities = torch.softmax(logits, dim=1)
117
+ max_probability = torch.max(bert_probabilities).item()
118
+ predicted_label_bert = torch.argmax(logits, dim=1).item()
119
  # ----------- Debug Logs -----------
120
  logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
121
  svc_debug = decodedLabel(int(SVM_Predicted[0]))
 
132
 
133
  'predicted_label_lstm': decodedLabel(int(predicted_label_index)),
134
  'probability_lstm': f"{int(float(np.max(Seq_Predicted))*10000//100)}%",
135
+
136
+ 'predicted_label_bert': int(predicted_label_bert),
137
+ 'probability_bert': f"{int(float(max_probability)*10000//100)}%",
138
 
139
  'Article_Content': text
140
  }
 
254
  "predicted_label": result.get("predicted_label_lstm"),
255
  "probability": result.get("probability_lstm")
256
  }
257
+ "BERT": {
258
+ "predicted_label": result.get("predicted_label_bert"),
259
+ "probability": result.get("probability_bert")
260
+ }
261
  })
262
 
263
  st.divider() # πŸ‘ˆ Draws a horizontal rule