from subprocess import Popen from transformers import AutoProcessor, AutoModelForPreTraining processor = AutoProcessor.from_pretrained("patrickvonplaten/mms-1b") model = AutoModelForPreTraining.from_pretrained("patrickvonplaten/mms-1b") def transcribe(audio): Popen('python mms_infer.py --model model --lang eng --audio audio') gr.Interface( title = 'MetaAI (Facebook Research) MMS (Massively Multilingual Speech) ASR', fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath") ], outputs=[ "textbox" ], live=True).launch()