import gradio as gr from transformers import pipeline import torch # Initialize ASR and classifier pipelines model_asr = "kairaamilanii/whisper-mind14-enUS" model_class = "kairaamilanii/RoBERTa-minds14-en" transcriber = pipeline( "automatic-speech-recognition", model=model_asr, chunk_length_s=30, device="cuda:0" if torch.cuda.is_available() else "cpu" ) classifier = pipeline("text-classification", model=model_class) intent_classes = { 0: 'abroad', 1: 'address', 2: 'app_error', 3: 'atm_limit', 4: 'balance', 5: 'business_loan', 6: 'card_issues', 7: 'cash_deposit', 8: 'direct_debit', 9: 'freeze', 10: 'high_value_payment', 11: 'joint_account', 12: 'latest_transactions', 13: 'pay_bill' } # Function to process audio def process_audio(audio): # Transcribe the audio text_asr = transcriber(audio)['text'] # Classify the intent intent_class = classifier(text_asr) label_index = int(intent_class[0]['label'].split('_')[1]) intent_name = intent_classes.get(label_index, "Unknown") return text_asr, intent_name # Create Gradio interface iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath"), outputs=["text", "text"], title="ASR and Intent Classification", description="Upload an audio file to get transcription and intent classification." ) iface.launch()