Freakdivi commited on
Commit
4de07cc
·
verified ·
1 Parent(s): 5ab220d

Upload 5 files

Browse files
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import joblib
4
+ import numpy as np
5
+ from transformers import BertTokenizer, BertModel
6
+
7
+ # ----------------- 1. Setup Device -----------------
8
+ # HF Spaces (Free) usually runs on CPU, but this keeps it robust
9
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
+ print(f"Using device: {device}")
11
+
12
+ # ----------------- 2. Load BERT -----------------
13
+ print("Loading BERT model...")
14
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
15
+ bert_model = BertModel.from_pretrained('bert-base-uncased')
16
+ bert_model.to(device)
17
+ bert_model.eval()
18
+
19
+ # ----------------- 3. Load MLP + Scaler + LabelEncoder -----------------
20
+ # Ensure these files are uploaded to your HF Space Files tab!
21
+ print("Loading classification components...")
22
+ try:
23
+ mlp = joblib.load("mlp_query_classifier.joblib")
24
+ scaler = joblib.load("scaler_query_classifier.joblib")
25
+ le = joblib.load("label_encoder_query_classifier.joblib")
26
+ print("Loaded MLP, scaler, and label encoder.")
27
+ except FileNotFoundError as e:
28
+ print(f"Error: {e}. Please make sure you uploaded the .joblib files to the Space.")
29
+
30
+ # ----------------- 4. Embedding Function -----------------
31
+ def get_bert_embeddings(text_list):
32
+ inputs = tokenizer(
33
+ text_list,
34
+ padding=True,
35
+ truncation=True,
36
+ max_length=128,
37
+ return_tensors="pt"
38
+ ).to(device)
39
+
40
+ with torch.no_grad():
41
+ outputs = bert_model(**inputs)
42
+
43
+ cls_embeddings = outputs.last_hidden_state[:, 0, :]
44
+ return cls_embeddings.cpu().numpy()
45
+
46
+ # ----------------- 5. Prediction Function -----------------
47
+ def predict_new_query(text):
48
+ # 1) BERT embedding
49
+ embedding = get_bert_embeddings([text])
50
+
51
+ # 2) scale with same scaler as training
52
+ embedding_scaled = scaler.transform(embedding)
53
+
54
+ # 3) MLP prediction -> class index
55
+ prediction_index = mlp.predict(embedding_scaled)[0]
56
+
57
+ # 4) map index back to string label
58
+ label = le.inverse_transform([prediction_index])[0]
59
+
60
+ # Optional: Get probability if your MLP supports it
61
+ try:
62
+ probs = mlp.predict_proba(embedding_scaled)[0]
63
+ confidence = np.max(probs)
64
+ return f"Label: {label} (Confidence: {confidence:.2f})"
65
+ except:
66
+ return f"Label: {label}"
67
+
68
+ # ----------------- 6. Launch Gradio Interface -----------------
69
+ # This creates the web UI
70
+ iface = gr.Interface(
71
+ fn=predict_new_query,
72
+ inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
73
+ outputs="text",
74
+ title="BERT Query Classifier",
75
+ description="Enter a text query to classify it using the custom BERT+MLP model."
76
+ )
77
+
78
+ iface.launch()
label_encoder_query_classifier.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3aa7e324c30cf231a4d1024b079027fd0b757d04bce5b50aa49d02d55d4841d
3
+ size 561
mlp_query_classifier.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6451080153abeeb9a38e3030d68f426d5d939fec81bf1dd0de8e6a3f54249d8
3
+ size 5723034
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ scikit-learn
5
+ joblib
6
+ numpy
scaler_query_classifier.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02355072620b8db29e0397b73264905b25c0b3d6d7aba8f8d68e5f2335f94791
3
+ size 31575