WXM2000's picture
Create app.py
04934a2
import gradio as gr
from transformers import pipeline
# from huggingsound import SpeechRecognitionModel
from scipy.io.wavfile import write
from huggingsound import SpeechRecognitionModel
model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn")
# audio_paths = ["/path/to/file.mp3", "/path/to/another_file.wav"]
# transcriptions = model.transcribe(audio_paths)
def func(input_audio):
print(input_audio )
write('./audio.wav', input_audio[0], input_audio[1])
out_txt = model.transcribe(['./audio.wav'])
return out_txt[0]['transcription']
inp =[gr.Audio(source='microphone')]
out = [gr.Textbox()]
demo = gr.Interface(func,
# gr.Audio(source='microphone'),
inputs = inp,
outputs = out)
# examples=examples)
demo.launch()