from transformers import pipeline import gradio as gr import pyewts converter = pyewts.pyewts() pipe = pipeline(model="spsither/whisper-small-r2-70k-2ep",device='cuda') # change to "your-username/the-name-you-picked" def transcribe(microphone, upload): if(microphone): audio = microphone else: audio = upload text = pipe(audio)["text"] text = remove_repeated_words(text) state = converter.toUnicode(text) return state # Set the starting state to an empty string iface = gr.Interface( fn=transcribe, inputs=[gr.Audio(source="microphone", type="filepath"),gr.Audio(source="upload", type="filepath")], outputs="text", title="Whisper Small Tibetan", description="Realtime demo for Tibetan speech recognition using a fine-tuned Whisper medium model.", ) iface.launch(share=True,debug=True)