Spaces:

theodotus
/

streaming-asr-uk

Sleeping

theodotus commited on Sep 24, 2022

Commit

c2163fe

•

1 Parent(s): f5f9215

Used Nemo streaming logic

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import numpy as np
 import resampy
 import torch
-from math import floor,ceil
 import nemo.collections.asr as nemo_asr
@@ -17,9 +17,17 @@ asr_model.encoder.freeze()
 asr_model.decoder.freeze()
-total_buffer = asr_model.cfg["sample_rate"] * 19 // 10
-overhead_len = total_buffer * 5 // 8
-model_stride = 4
@@ -39,19 +47,13 @@ def model(audio_16k):
 def decode_predictions(logits_list):
-    # calc overhead
-    logits_overhead = logits_list[0].shape[1] * overhead_len / total_buffer / 2
-    if (logits_overhead * 2 != int(logits_overhead * 2)):
-        raise ValueError("Wrong total_buffer")
     # cut overhead
     cutted_logits = []
     for idx in range(len(logits_list)):
-        start_cut = 0 if (idx==0) else floor(logits_overhead)
-        end_cut = 1 if (idx==len(logits_list)-1) else ceil(logits_overhead)
-        if (logits_overhead == int(logits_overhead)) and (end_cut != 1):
-            end_cut +=1
-        logits = logits_list[idx][:, start_cut:-end_cut]
         cutted_logits.append(logits)
     # join

 import resampy
 import torch
+from math import ceil
 import nemo.collections.asr as nemo_asr
 asr_model.decoder.freeze()
+buffer_len = 1.6
+chunk_len = 0.8
+total_buffer = round(buffer_len * asr_model.cfg.sample_rate)
+overhead_len = round((buffer_len - chunk_len) *  asr_model.cfg.sample_rate)
+model_stride = 8
+model_stride_in_secs = asr_model.cfg.preprocessor.window_stride * model_stride
+tokens_per_chunk = ceil(chunk_len / model_stride_in_secs)
+mid_delay = ceil((chunk_len + (buffer_len - chunk_len) / 2) / model_stride_in_secs)
 def decode_predictions(logits_list):
+    logits_len = logits_list[0].shape[1]
     # cut overhead
     cutted_logits = []
     for idx in range(len(logits_list)):
+        start_cut = 0 if (idx==0) else logits_len - 1 - mid_delay
+        end_cut = -1 if (idx==len(logits_list)-1) else logits_len - 1 - mid_delay + tokens_per_chunk
+        logits = logits_list[idx][:, start_cut:end_cut]
         cutted_logits.append(logits)
     # join