Spaces:

MINHCT
/

Classification

Running

App Files Files Community

MINHCT commited on Apr 30

Commit

3c09b95

•

1 Parent(s): db06d58

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -1

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from tensorflow.keras.models import load_model
 from tensorflow.keras.preprocessing.text import Tokenizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import numpy as np
 # load all the models and vectorizer (global vocabulary)
 Seq_model = load_model("LSTM.h5") # Sequential
@@ -18,6 +20,11 @@ svm_model = joblib.load('svm_model.joblib')
 vectorizer = joblib.load("vectorizer.joblib") # global vocabulary (used for Logistic, SVC)
 tokenizer = joblib.load("tokenizer.joblib") # used for LSTM
 # Decode label function
 # {'business': 0, 'entertainment': 1, 'health': 2, 'politics': 3, 'sport': 4}
 def decodedLabel(input_number):
@@ -86,6 +93,13 @@ def process_api(text):
     processed_text = vectorizer.transform([text])
     sequence = tokenizer.texts_to_sequences([text])
     padded_sequence = pad_sequences(sequence, maxlen=1000, padding='post')
     # Get the predicted result from models
     Logistic_Predicted = logistic_model.predict(processed_text).tolist() # Logistic Model
@@ -97,8 +111,11 @@ def process_api(text):
     Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
     svm_new_probs = SVM_model.decision_function(processed_text)
     svm_probs = svm_model.predict_proba(svm_new_probs)
     predicted_label_index = np.argmax(Seq_Predicted)
     # ----------- Debug Logs -----------
     logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
     svc_debug = decodedLabel(int(SVM_Predicted[0]))
@@ -115,6 +132,9 @@ def process_api(text):
             'predicted_label_lstm': decodedLabel(int(predicted_label_index)),
             'probability_lstm': f"{int(float(np.max(Seq_Predicted))*10000//100)}%",
             'Article_Content': text
         }
@@ -234,6 +254,10 @@ if url:
             "predicted_label": result.get("predicted_label_lstm"),
             "probability": result.get("probability_lstm")
         }
     })
 st.divider()  # 👈 Draws a horizontal rule

 from tensorflow.keras.preprocessing.text import Tokenizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 import numpy as np
+from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
+import torch
 # load all the models and vectorizer (global vocabulary)
 Seq_model = load_model("LSTM.h5") # Sequential
 vectorizer = joblib.load("vectorizer.joblib") # global vocabulary (used for Logistic, SVC)
 tokenizer = joblib.load("tokenizer.joblib") # used for LSTM
+tokenizer1 = DistilBertTokenizer.from_pretrained("tokenizer_bert")
+model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=5)
+model.load_state_dict(torch.load("fine_tuned_bert_model1.pth", map_location=torch.device('gpu')))
+model = model.to('gpu')
 # Decode label function
 # {'business': 0, 'entertainment': 1, 'health': 2, 'politics': 3, 'sport': 4}
 def decodedLabel(input_number):
     processed_text = vectorizer.transform([text])
     sequence = tokenizer.texts_to_sequences([text])
     padded_sequence = pad_sequences(sequence, maxlen=1000, padding='post')
+    new_encoding = tokenizer1([text], truncation=True, padding=True, return_tensors="pt")
+    input_ids = new_encoding['input_ids']
+    attention_mask = new_encoding['attention_mask']
+    with torch.no_grad():
+        output = model(input_ids, attention_mask=attention_mask)
+        logits = output.logits
     # Get the predicted result from models
     Logistic_Predicted = logistic_model.predict(processed_text).tolist() # Logistic Model
     Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
     svm_new_probs = SVM_model.decision_function(processed_text)
     svm_probs = svm_model.predict_proba(svm_new_probs)
     predicted_label_index = np.argmax(Seq_Predicted)
+    bert_probabilities = torch.softmax(logits, dim=1)
+    max_probability = torch.max(bert_probabilities).item()
+    predicted_label_bert = torch.argmax(logits, dim=1).item()
     # ----------- Debug Logs -----------
     logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
     svc_debug = decodedLabel(int(SVM_Predicted[0]))
             'predicted_label_lstm': decodedLabel(int(predicted_label_index)),
             'probability_lstm': f"{int(float(np.max(Seq_Predicted))*10000//100)}%",
+            'predicted_label_bert': int(predicted_label_bert),
+            'probability_bert': f"{int(float(max_probability)*10000//100)}%",
             'Article_Content': text
         }
             "predicted_label": result.get("predicted_label_lstm"),
             "probability": result.get("probability_lstm")
         }
+        "BERT": {
+            "predicted_label": result.get("predicted_label_bert"),
+            "probability": result.get("probability_bert")
+        }
     })
 st.divider()  # 👈 Draws a horizontal rule