import gradio as gr import time from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM LARGE_MODEL_BY_LANGUAGE = { "Korean": {"model_id": "kresnik/wav2vec2-large-xlsr-korean", "has_lm": True}, } p=pipeline("automatic-speech-recognition", model="kresnik/wav2vec2-large-xlsr-korean") def transcribe(audio, state=""): time.sleep(2) text = p(audio)["text"] state+= str(text)+ " " return state, state #return text gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath", label="Record something..."), "state" ], outputs=[ "textbox", "state" ], title="Automatic Speech Recognition", description="", css=""" .result {display:flex;flex-direction:column} .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%} .result_item_success {background-color:mediumaquamarine;color:white;align-self:start} .result_item_error {background-color:#ff7070;color:white;align-self:start} """, allow_screenshot=False, allow_flagging="never", theme="grass", live=True ).launch()