File size: 1,031 Bytes
07d71a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from transformers import pipeline, AutoFeatureExtractor, AutoTokenizer, Wav2Vec2ForCTC
import gradio as gr
import time

model_id = 'comodoro/wav2vec2-xls-r-300m-cs-250'
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
model = Wav2Vec2ForCTC.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

p = pipeline("automatic-speech-recognition", chunk_length_s=5, model=model, 
    tokenizer=tokenizer, feature_extractor=feature_extractor)

def transcribe(audio, state=""):
    text = p(audio)["text"]
    state += text + " "
    return state

with gr.Blocks() as blocks:
  audio = gr.Audio(source="microphone", type="filepath",
      label='Pokud je to třeba, povolte mikrofon pro tuto stránku, \
 klikněte na Record from microphone, po dokončení nahrávání na Stop recording a poté na Rozpoznat')
  btn = gr.Button('Rozpoznat')
  output = gr.Textbox(show_label=False)
  btn.click(fn=transcribe, inputs=[audio,], 
      outputs=[output,])

blocks.launch(enable_queue=True, debug=True)