In [1]:
#|default_exp app

In [2]:
#|export
import numpy as np
import pandas as pd
import gradio as gr
from datasets import Dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer

In [3]:
#|export
import warnings, logging
warnings.simplefilter('ignore')
logging.disable(logging.WARNING)

In [4]:
#|export
model = AutoModelForSequenceClassification.from_pretrained("./spam_model/")
tokz = AutoTokenizer.from_pretrained("./spam_model/")
trainer = Trainer(model, tokenizer=tokz)

In [5]:
trainer



In [6]:
#|export
def tok_func(x):
 return tokz(x["input"])

In [7]:
document = 'Send this message to 5 more people ASAP'
input_ds = Dataset.from_pandas(pd.DataFrame([document], columns=['input'])).map(tok_func, batched=True)
trainer.predict(input_ds).predictions.astype(float)[0, 0]

Map: 0%| | 0/1 [00:00

0.8317995071411133

In [8]:
#|export
def classify_message(text):
 input_ds = Dataset.from_pandas(pd.DataFrame([text], columns=['input'])).map(tok_func, batched=True)
 spam_prob = np.clip(trainer.predict(input_ds).predictions.astype(float), 0, 1)[0, 0]
 return f'{100*spam_prob:.1f}% probability being Spam'

In [9]:
#|export
intf = gr.Interface(fn=classify_message, inputs='text', outputs='text')
intf.launch(inline=False)

Running on local URL: http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [10]:
from nbdev.export import nb_export
nb_export('app.ipynb', '.')

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
