import spaces from transformers import pipeline import gradio as gr import torch device = torch.device("cuda" if torch.cuda.is_available() else "cpu") asr = pipeline(model="asif00/whisper-bangla").to(device=device) ser = pipeline("text2text-generation", model="asif00/mbart_bn_error_correction").to( device=device ) @spaces.GPU def transcribe(audio): text = asr(audio)["text"] return text @spaces.GPU def correction(text): corrected_text = ser(text) print(corrected_text) return corrected_text def transcribe_and_correct(audio): text = transcribe(audio) corrected_text = correction(text) return corrected_text iface = gr.Interface( fn=transcribe_and_correct, inputs=gr.Audio(sources="microphone", type="filepath"), outputs="text", title="Whisper Bangla", description="Realtime demo for Bengali speech recognition using a fine-tuned Whisper small model.", ) iface.launch()