Ngadou's picture
Create app.py
396fa06
raw
history blame
1.21 kB
import gradio as gr
import time
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
# ASR pipeline
asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
# Load classifier model and tokenizer
classifier_model = AutoModelForSequenceClassification.from_pretrained("Ngadou/bert-sms-spam-dectector")
classifier_tokenizer = AutoTokenizer.from_pretrained("Ngadou/bert-sms-spam-dectector")
def classify_audio(audio):
# Transcribe the audio to text
text = asr_pipeline(audio)["text"]
# Tokenize the text and feed it to the model
inputs = classifier_tokenizer.encode_plus(text, return_tensors="pt")
outputs = classifier_model(**inputs)
# Get the prediction (0 = ham, 1 = spam)
prediction = outputs.logits.argmax(dim=1).item()
# Return the transcription and the prediction as a dictionary
return text, "Scam" if prediction == 1 else "Safe Message"
gr.Interface(
fn=classify_audio,
inputs=gr.inputs.Audio(source="upload", type="filepath"),
outputs=[
gr.outputs.Textbox(label="Transcription"),
gr.outputs.Textbox(label="Classification"),
],
live=True
).launch(share=True)