kresnik's picture
updated
8ca1fee
import gradio as gr
import time
from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
LARGE_MODEL_BY_LANGUAGE = {
"Korean": {"model_id": "kresnik/wav2vec2-large-xlsr-korean", "has_lm": True},
}
p=pipeline("automatic-speech-recognition", model="kresnik/wav2vec2-large-xlsr-korean")
def transcribe(audio, state=""):
time.sleep(2)
text = p(audio)["text"]
state+= str(text)+ " "
return state, state
#return text
gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", label="Record something..."),
"state"
],
outputs=[
"textbox",
"state"
],
title="Automatic Speech Recognition",
description="",
css="""
.result {display:flex;flex-direction:column}
.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
.result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
.result_item_error {background-color:#ff7070;color:white;align-self:start}
""",
allow_screenshot=False,
allow_flagging="never",
theme="grass",
live=True
).launch()