|
from transformers import pipeline, AutoFeatureExtractor, AutoTokenizer, Wav2Vec2ForCTC |
|
import gradio as gr |
|
import time |
|
|
|
model_id = 'comodoro/wav2vec2-xls-r-300m-cs-250' |
|
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) |
|
model = Wav2Vec2ForCTC.from_pretrained(model_id) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
|
p = pipeline("automatic-speech-recognition", chunk_length_s=5, model=model, |
|
tokenizer=tokenizer, feature_extractor=feature_extractor) |
|
|
|
def transcribe(audio, state=""): |
|
text = p(audio)["text"] |
|
state += text + " " |
|
return state |
|
|
|
with gr.Blocks() as blocks: |
|
audio = gr.Audio(source="microphone", type="filepath", |
|
label='Pokud je to třeba, povolte mikrofon pro tuto stránku, \ |
|
klikněte na Record from microphone, po dokončení nahrávání na Stop recording a poté na Rozpoznat') |
|
btn = gr.Button('Rozpoznat') |
|
output = gr.Textbox(show_label=False) |
|
btn.click(fn=transcribe, inputs=[audio,], |
|
outputs=[output,]) |
|
|
|
blocks.launch(enable_queue=True, debug=True) |