import gradio as gr from transformers import pipeline # from huggingsound import SpeechRecognitionModel from scipy.io.wavfile import write from huggingsound import SpeechRecognitionModel model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn") # audio_paths = ["/path/to/file.mp3", "/path/to/another_file.wav"] # transcriptions = model.transcribe(audio_paths) def func(input_audio): print(input_audio ) write('./audio.wav', input_audio[0], input_audio[1]) out_txt = model.transcribe(['./audio.wav']) return out_txt[0]['transcription'] inp =[gr.Audio(source='microphone')] out = [gr.Textbox()] demo = gr.Interface(func, # gr.Audio(source='microphone'), inputs = inp, outputs = out) # examples=examples) demo.launch()