Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ from tensorflow.keras.models import load_model
|
|
8 |
from tensorflow.keras.preprocessing.text import Tokenizer
|
9 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
10 |
import numpy as np
|
|
|
|
|
11 |
|
12 |
# load all the models and vectorizer (global vocabulary)
|
13 |
Seq_model = load_model("LSTM.h5") # Sequential
|
@@ -18,6 +20,11 @@ svm_model = joblib.load('svm_model.joblib')
|
|
18 |
vectorizer = joblib.load("vectorizer.joblib") # global vocabulary (used for Logistic, SVC)
|
19 |
tokenizer = joblib.load("tokenizer.joblib") # used for LSTM
|
20 |
|
|
|
|
|
|
|
|
|
|
|
21 |
# Decode label function
|
22 |
# {'business': 0, 'entertainment': 1, 'health': 2, 'politics': 3, 'sport': 4}
|
23 |
def decodedLabel(input_number):
|
@@ -86,6 +93,13 @@ def process_api(text):
|
|
86 |
processed_text = vectorizer.transform([text])
|
87 |
sequence = tokenizer.texts_to_sequences([text])
|
88 |
padded_sequence = pad_sequences(sequence, maxlen=1000, padding='post')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
# Get the predicted result from models
|
91 |
Logistic_Predicted = logistic_model.predict(processed_text).tolist() # Logistic Model
|
@@ -97,8 +111,11 @@ def process_api(text):
|
|
97 |
Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
|
98 |
svm_new_probs = SVM_model.decision_function(processed_text)
|
99 |
svm_probs = svm_model.predict_proba(svm_new_probs)
|
100 |
-
|
101 |
predicted_label_index = np.argmax(Seq_Predicted)
|
|
|
|
|
|
|
|
|
102 |
# ----------- Debug Logs -----------
|
103 |
logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
|
104 |
svc_debug = decodedLabel(int(SVM_Predicted[0]))
|
@@ -115,6 +132,9 @@ def process_api(text):
|
|
115 |
|
116 |
'predicted_label_lstm': decodedLabel(int(predicted_label_index)),
|
117 |
'probability_lstm': f"{int(float(np.max(Seq_Predicted))*10000//100)}%",
|
|
|
|
|
|
|
118 |
|
119 |
'Article_Content': text
|
120 |
}
|
@@ -234,6 +254,10 @@ if url:
|
|
234 |
"predicted_label": result.get("predicted_label_lstm"),
|
235 |
"probability": result.get("probability_lstm")
|
236 |
}
|
|
|
|
|
|
|
|
|
237 |
})
|
238 |
|
239 |
st.divider() # π Draws a horizontal rule
|
|
|
8 |
from tensorflow.keras.preprocessing.text import Tokenizer
|
9 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
10 |
import numpy as np
|
11 |
+
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
|
12 |
+
import torch
|
13 |
|
14 |
# load all the models and vectorizer (global vocabulary)
|
15 |
Seq_model = load_model("LSTM.h5") # Sequential
|
|
|
20 |
vectorizer = joblib.load("vectorizer.joblib") # global vocabulary (used for Logistic, SVC)
|
21 |
tokenizer = joblib.load("tokenizer.joblib") # used for LSTM
|
22 |
|
23 |
+
tokenizer1 = DistilBertTokenizer.from_pretrained("tokenizer_bert")
|
24 |
+
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=5)
|
25 |
+
model.load_state_dict(torch.load("fine_tuned_bert_model1.pth", map_location=torch.device('gpu')))
|
26 |
+
model = model.to('gpu')
|
27 |
+
|
28 |
# Decode label function
|
29 |
# {'business': 0, 'entertainment': 1, 'health': 2, 'politics': 3, 'sport': 4}
|
30 |
def decodedLabel(input_number):
|
|
|
93 |
processed_text = vectorizer.transform([text])
|
94 |
sequence = tokenizer.texts_to_sequences([text])
|
95 |
padded_sequence = pad_sequences(sequence, maxlen=1000, padding='post')
|
96 |
+
|
97 |
+
new_encoding = tokenizer1([text], truncation=True, padding=True, return_tensors="pt")
|
98 |
+
input_ids = new_encoding['input_ids']
|
99 |
+
attention_mask = new_encoding['attention_mask']
|
100 |
+
with torch.no_grad():
|
101 |
+
output = model(input_ids, attention_mask=attention_mask)
|
102 |
+
logits = output.logits
|
103 |
|
104 |
# Get the predicted result from models
|
105 |
Logistic_Predicted = logistic_model.predict(processed_text).tolist() # Logistic Model
|
|
|
111 |
Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
|
112 |
svm_new_probs = SVM_model.decision_function(processed_text)
|
113 |
svm_probs = svm_model.predict_proba(svm_new_probs)
|
|
|
114 |
predicted_label_index = np.argmax(Seq_Predicted)
|
115 |
+
|
116 |
+
bert_probabilities = torch.softmax(logits, dim=1)
|
117 |
+
max_probability = torch.max(bert_probabilities).item()
|
118 |
+
predicted_label_bert = torch.argmax(logits, dim=1).item()
|
119 |
# ----------- Debug Logs -----------
|
120 |
logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
|
121 |
svc_debug = decodedLabel(int(SVM_Predicted[0]))
|
|
|
132 |
|
133 |
'predicted_label_lstm': decodedLabel(int(predicted_label_index)),
|
134 |
'probability_lstm': f"{int(float(np.max(Seq_Predicted))*10000//100)}%",
|
135 |
+
|
136 |
+
'predicted_label_bert': int(predicted_label_bert),
|
137 |
+
'probability_bert': f"{int(float(max_probability)*10000//100)}%",
|
138 |
|
139 |
'Article_Content': text
|
140 |
}
|
|
|
254 |
"predicted_label": result.get("predicted_label_lstm"),
|
255 |
"probability": result.get("probability_lstm")
|
256 |
}
|
257 |
+
"BERT": {
|
258 |
+
"predicted_label": result.get("predicted_label_bert"),
|
259 |
+
"probability": result.get("probability_bert")
|
260 |
+
}
|
261 |
})
|
262 |
|
263 |
st.divider() # π Draws a horizontal rule
|