baasfasfasfasf commited on
Commit
daa3dea
1 Parent(s): 1edec07
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.arpa filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language: en
4
+ tags:
5
+ - speech-to-text
6
+ ---
7
+ # Exported [Nemo](https://github.com/NVIDIA/NeMo) models for Speech to Text with [OpenSLR 11](https://www.openslr.org/11/) librispeech 3-gram language model
8
+
9
+ This model is intended to be used with [npc-engine](https://github.com/npc-engine/npc-engine).
config.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_type: "NemoSTT"
2
+
3
+ # frame size in ms for incremental transcription
4
+ frame_size: 1000
5
+
6
+ # Parameters from https://github.com/NVIDIA/NeMo/blob/stable/tutorials/asr/Online_ASR_Microphone_Demo.ipynb
7
+ frame_overlap: 2
8
+ offset: 4
9
+ # timestep_duration = model._cfg.preprocessor['window_stride']
10
+ # for block in model._cfg.encoder['jasper']:
11
+ # timestep_duration *= block['stride'][0] ** block['repeat']
12
+ timestep_duration: 0.02
13
+
14
+ # Sample rate
15
+ sample_rate: 16000
16
+
17
+ # Minimum detectable VAD section in ms
18
+ min_speech_duration: 400
19
+
20
+ # Timeout in ms to flush results if speech wasn't finished semantically
21
+ max_silence_duration: 1000
22
+
23
+ # VAD frame size in ms
24
+ vad_frame_ms: 20
25
+
26
+ transcribe_realtime: False
27
+ predict_punctuation: False
28
+ alpha: 0.0253813572180912
29
+ beta: 0.08
ctc.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0d72eb87f56c10bc51952ee327d45f76ebfbd30e21f828cd3fab18ff3212f10
3
+ size 75577820
lowercase_3-gram.pruned.1e-7.arpa ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61dd499d412fb7493b093d846e055587985272373808b4b0316ee76dee5805bb
3
+ size 40314194
punctuation.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3c64263783470c92264dcd2d9279c7ba2e45818b5d9ff0bbed987def578ae0f
3
+ size 265505964
sentence_prediction.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa2fb314443b18f1f0d82f9845f0cef49fd92d98a6a314c535e6717127f21500
3
+ size 90894457
sentence_tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff