File size: 898 Bytes
4da7379 357cf66 4da7379 357cf66 4da7379 357cf66 4da7379 9638edb 4da7379 b9cf3e2 ab2a4bd 4da7379 ab2a4bd b9cf3e2 ab2a4bd b9cf3e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
from sklearn.ensemble import IsolationForest
from tqdm import tqdm
import torch
import gradio as gr
import numpy as np
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
model.eval()
data = np.load("x_train.npy")
iso_forest = IsolationForest(contamination=0.15, random_state=42)
iso_forest.fit(data)
def classify_email(text):
with torch.no_grad():
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
outputs = model(**inputs)
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
pred = iso_forest.predict(cls_embedding)[0]
return pred
demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
demo.launch()
|