File size: 1,171 Bytes
10b9192
cc28349
10b9192
 
 
 
 
 
 
 
82782ac
10b9192
14855d6
 
 
8ca1fee
cc28349
 
10b9192
 
14855d6
10b9192
 
 
 
 
14855d6
10b9192
 
 
 
 
 
 
 
 
 
 
 
0b38351
cc28349
ed41b08
10b9192
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
import time
from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM



LARGE_MODEL_BY_LANGUAGE = {
    "Korean": {"model_id": "kresnik/wav2vec2-large-xlsr-korean", "has_lm": True},
}

p=pipeline("automatic-speech-recognition", model="kresnik/wav2vec2-large-xlsr-korean")

def transcribe(audio, state=""):
    time.sleep(2)
    text = p(audio)["text"]
    state+= str(text)+ " "
    return state, state
    #return text

gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath", label="Record something..."),
        "state"
    ],
    outputs=[
	"textbox",
        "state"
    ],
    title="Automatic Speech Recognition",
    description="",
    css="""
    .result {display:flex;flex-direction:column}
    .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
    .result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
    .result_item_error {background-color:#ff7070;color:white;align-self:start}
    """,
    allow_screenshot=False,
    allow_flagging="never",
    theme="grass",
    live=True
).launch()