|
from stt import Model |
|
import gradio as gr |
|
import numpy as np |
|
|
|
model = 'stt-comodoro-czech-2022-05-31.tflite' |
|
scorer = 'czech-large-vocab.scorer' |
|
beam_width = 512 |
|
lm_alpha = 0.94 |
|
lm_beta = 2.52 |
|
|
|
model = Model(model) |
|
model.enableExternalScorer(scorer) |
|
model.setScorerAlphaBeta(lm_alpha, lm_beta) |
|
model.setBeamWidth(beam_width) |
|
|
|
def reformat_freq(sr, y): |
|
if sr not in ( |
|
48000, |
|
16000, |
|
): |
|
raise ValueError("Unsupported rate", sr) |
|
if sr == 48000: |
|
y = ( |
|
((y / max(np.max(y), 1)) * 32767) |
|
.reshape((-1, 3)) |
|
.mean(axis=1) |
|
.astype("int16") |
|
) |
|
sr = 16000 |
|
return sr, y |
|
|
|
def transcribe(speech): |
|
_, y = reformat_freq(*speech) |
|
stream = model.createStream() |
|
stream.feedAudioContent(y) |
|
text = stream.intermediateDecode() |
|
return text |
|
|
|
with gr.Blocks() as blocks: |
|
audio = gr.Audio(source="microphone", type="numpy", streaming=False, |
|
label='Pokud je to třeba, povolte mikrofon pro tuto stránku, \ |
|
klikněte na Record from microphone, po dokončení nahrávání na Stop recording a poté na Rozpoznat') |
|
btn = gr.Button('Rozpoznat') |
|
output = gr.Textbox(show_label=False) |
|
btn.click(fn=transcribe, inputs=[audio], |
|
outputs=[output]) |
|
|
|
blocks.launch(enable_queue=True, debug=True, share=True) |