Spaces:
Sleeping
Sleeping
File size: 2,483 Bytes
6d4fefb b5394e7 6d4fefb dd273fe 6d4fefb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
import gradio as gr
from sklearn.preprocessing import LabelEncoder
import pandas as pd
# ===== Load Label Encoder =====
df = pd.read_csv("Dataset_new.csv",delimiter=";")
le = LabelEncoder()
le.fit(df["label"])
# ===== Define Model Class =====
class IndoBERTClassifier(nn.Module):
def __init__(self, model_name, num_labels):
super(IndoBERTClassifier, self).__init__()
self.bert = AutoModel.from_pretrained(model_name)
self.dropout = nn.Dropout(0.3)
self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
def forward(self, input_ids, attention_mask, token_type_ids=None):
outputs = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids
)
pooled = outputs.last_hidden_state[:, 0]
pooled = self.dropout(pooled)
logits = self.classifier(pooled)
return logits
# ===== Load Model and Tokenizer =====
tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p1")
model = IndoBERTClassifier("indobenchmark/indobert-base-p1", num_labels=5)
model.load_state_dict(torch.load("pytorch_model.bin", map_location=torch.device("cpu")))
model.eval()
# ===== Prediction Function =====
def predict(text):
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
logits = model(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
token_type_ids=inputs.get("token_type_ids")
)
pred = torch.argmax(logits, dim=1).item()
label = le.inverse_transform([pred])[0]
return f"π¨ Kategori Deteksi:\n\nπ {label} (Label {pred})"
# ===== Gradio UI =====
with gr.Blocks() as demo:
gr.Markdown("## π€ Deteksi Spam Penipuan Berbahasa Indonesia")
gr.Markdown("Masukkan kalimat pesan yang ingin diperiksa apakah termasuk penipuan, permintaan data diri, tautan mencurigakan, atau tawaran kerja palsu.")
with gr.Row():
input_text = gr.Textbox(lines=3, placeholder="Contoh: Selamat! Anda mendapatkan hadiah. Klik link ini.", label="π¬ Masukkan Kalimat")
with gr.Row():
output_text = gr.Textbox(label="π€ Hasil Klasifikasi")
run_button = gr.Button("π Deteksi")
run_button.click(fn=predict, inputs=input_text, outputs=output_text)
demo.launch() |