File size: 898 Bytes
4da7379
357cf66
 
4da7379
 
 
357cf66
 
4da7379
 
 
357cf66
4da7379
9638edb
4da7379
b9cf3e2
ab2a4bd
4da7379
 
 
 
 
ab2a4bd
b9cf3e2
ab2a4bd
b9cf3e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel
from sklearn.ensemble import IsolationForest
from tqdm import tqdm

import torch
import gradio as gr
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")
model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased")
model.eval()

data = np.load("x_train.npy")
iso_forest = IsolationForest(contamination=0.15, random_state=42)
iso_forest.fit(data)

def classify_email(text):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256)
        outputs = model(**inputs)
        cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
    pred = iso_forest.predict(cls_embedding)[0]
    return pred

demo = gr.Interface(fn=classify_email, inputs="text", outputs="number")
demo.launch()