Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import torch | |
# Initialize ASR and classifier pipelines | |
model_asr = "kairaamilanii/whisper-mind14-enUS" | |
model_class = "kairaamilanii/RoBERTa-minds14-en" | |
transcriber = pipeline( | |
"automatic-speech-recognition", | |
model=model_asr, | |
chunk_length_s=30, | |
device="cuda:0" if torch.cuda.is_available() else "cpu" | |
) | |
classifier = pipeline("text-classification", model=model_class) | |
intent_classes = { | |
0: 'abroad', | |
1: 'address', | |
2: 'app_error', | |
3: 'atm_limit', | |
4: 'balance', | |
5: 'business_loan', | |
6: 'card_issues', | |
7: 'cash_deposit', | |
8: 'direct_debit', | |
9: 'freeze', | |
10: 'high_value_payment', | |
11: 'joint_account', | |
12: 'latest_transactions', | |
13: 'pay_bill' | |
} | |
# Function to process audio | |
def process_audio(audio): | |
# Transcribe the audio | |
text_asr = transcriber(audio)['text'] | |
# Classify the intent | |
intent_class = classifier(text_asr) | |
label_index = int(intent_class[0]['label'].split('_')[1]) | |
intent_name = intent_classes.get(label_index, "Unknown") | |
return text_asr, intent_name | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="filepath"), | |
outputs=["text", "text"], | |
title="ASR and Intent Classification", | |
description="Upload an audio file to get transcription and intent classification." | |
) | |
iface.launch() | |