import gradio as gr import time from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer # ASR pipeline asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") # Load classifier model and tokenizer classifier_model = AutoModelForSequenceClassification.from_pretrained("Ngadou/bert-sms-spam-dectector") classifier_tokenizer = AutoTokenizer.from_pretrained("Ngadou/bert-sms-spam-dectector") def classify_audio(audio): # Transcribe the audio to text text = asr_pipeline(audio)["text"] # Tokenize the text and feed it to the model inputs = classifier_tokenizer.encode_plus(text, return_tensors="pt") outputs = classifier_model(**inputs) # Get the prediction (0 = ham, 1 = spam) prediction = outputs.logits.argmax(dim=1).item() # Return the transcription and the prediction as a dictionary return text, "Scam" if prediction == 1 else "Safe Message" gr.Interface( fn=classify_audio, inputs=gr.inputs.Audio(source="upload", type="filepath"), outputs=[ gr.outputs.Textbox(label="Transcription"), gr.outputs.Textbox(label="Classification"), ], live=True ).launch(share=True)