import gradio as gr
import time
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

# ASR pipeline
asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")

# Load classifier model and tokenizer
classifier_model = AutoModelForSequenceClassification.from_pretrained("Ngadou/bert-sms-spam-dectector")
classifier_tokenizer = AutoTokenizer.from_pretrained("Ngadou/bert-sms-spam-dectector")

def classify_audio(audio):
    # Transcribe the audio to text
    text = asr_pipeline(audio)["text"]

    # Tokenize the text and feed it to the model
    inputs = classifier_tokenizer.encode_plus(text, return_tensors="pt")
    outputs = classifier_model(**inputs)

    # Get the prediction (0 = ham, 1 = spam)
    prediction = outputs.logits.argmax(dim=1).item()

    # Return the transcription and the prediction as a dictionary
    return text, "Scam" if prediction == 1 else "Safe Message"

gr.Interface(
    fn=classify_audio, 
    inputs=gr.inputs.Audio(source="upload", type="filepath"),
    outputs=[
        gr.outputs.Textbox(label="Transcription"),
        gr.outputs.Textbox(label="Classification"),
    ],
    live=True
).launch(share=True)