autou / app.py
joaocansi
feat: change contamination to 0.15
9638edb
raw
history blame contribute delete
898 Bytes
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
from sklearn.ensemble import IsolationForest
from tqdm import tqdm
import torch
import gradio as gr
import numpy as np
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
model.eval()
data = np.load("x_train.npy")
iso_forest = IsolationForest(contamination=0.15, random_state=42)
iso_forest.fit(data)
def classify_email(text):
with torch.no_grad():
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
outputs = model(**inputs)
cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
pred = iso_forest.predict(cls_embedding)[0]
return pred
demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
demo.launch()