comodoro commited on
Commit
94d47c9
1 Parent(s): 656e769
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ czech-large-vocab.scorer filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
1
+ /venv
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from stt import Model
2
+ import gradio as gr
3
+ import numpy as np
4
+
5
+ model = 'stt-comodoro-czech-2022-05-31.tflite'
6
+ scorer = 'czech-large-vocab.scorer'
7
+ beam_width = 512
8
+ lm_alpha = 0.94
9
+ lm_beta = 2.52
10
+
11
+ model = Model(model)
12
+ model.enableExternalScorer(scorer)
13
+ model.setScorerAlphaBeta(lm_alpha, lm_beta)
14
+ model.setBeamWidth(beam_width)
15
+
16
+ def reformat_freq(sr, y):
17
+ if sr not in (
18
+ 48000,
19
+ 16000,
20
+ ): # Deepspeech only supports 16k, (we convert 48k -> 16k)
21
+ raise ValueError("Unsupported rate", sr)
22
+ if sr == 48000:
23
+ y = (
24
+ ((y / max(np.max(y), 1)) * 32767)
25
+ .reshape((-1, 3))
26
+ .mean(axis=1)
27
+ .astype("int16")
28
+ )
29
+ sr = 16000
30
+ return sr, y
31
+
32
+ def transcribe(speech):
33
+ _, y = reformat_freq(*speech)
34
+ stream = model.createStream()
35
+ stream.feedAudioContent(y)
36
+ text = stream.intermediateDecode()
37
+ return text
38
+
39
+ with gr.Blocks() as blocks:
40
+ audio = gr.Audio(source="microphone", type="numpy", streaming=False,
41
+ label='Pokud je to třeba, povolte mikrofon pro tuto stránku, \
42
+ klikněte na Record from microphone, po dokončení nahrávání na Stop recording a poté na Rozpoznat')
43
+ btn = gr.Button('Rozpoznat')
44
+ output = gr.Textbox(show_label=False)
45
+ btn.click(fn=transcribe, inputs=[audio],
46
+ outputs=[output])
47
+
48
+ blocks.launch(enable_queue=True, debug=True)
czech-large-vocab.scorer ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b1656ffc605f8df51e6bb5593ec9faee3610c7a7b9933d5d84f48bb307568d4
3
+ size 484210096
packages.txt ADDED
@@ -0,0 +1 @@
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ stt
2
+ torch
3
+ transformers
stt-comodoro-czech-2022-05-31.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc51b2380d0362cc5613935aacf848ad84805e9644ba3e733cef7962ba8336e4
3
+ size 47360928