Update app.py
Browse files
app.py
CHANGED
|
@@ -25,10 +25,10 @@ from nemo.utils import logging as nemo_logging
|
|
| 25 |
# ----------------------------
|
| 26 |
MODEL_NAME = os.environ.get("PARAKEET_MODEL", "nvidia/parakeet-tdt-0.6b-v3")
|
| 27 |
TARGET_SR = 16_000
|
| 28 |
-
BEAM_SIZE = int(os.environ.get("PARAKEET_BEAM_SIZE", "
|
| 29 |
OFFLINE_BATCH= int(os.environ.get("PARAKEET_BATCH", "8"))
|
| 30 |
-
CHUNK_S = float(os.environ.get("PARAKEET_CHUNK_S", "2.0"))
|
| 31 |
-
FLUSH_PAD_S = float(os.environ.get("PARAKEET_FLUSH_PAD_S", "2.0"))
|
| 32 |
|
| 33 |
# ----------------------------
|
| 34 |
# Logging (unified)
|
|
@@ -106,12 +106,6 @@ class ParakeetManager:
|
|
| 106 |
self._base_decoding = copy.deepcopy(self.model.cfg.decoding)
|
| 107 |
|
| 108 |
self._set_malsd_beam()
|
| 109 |
-
|
| 110 |
-
# Enable encoder caching for better streaming context (per NeMo docs/tutorials)
|
| 111 |
-
if hasattr(self.model.encoder, "set_default_att_context_size"):
|
| 112 |
-
self.model.encoder.set_default_att_context_size([512, 16]) # Large left for cumulative context, small right for buffering
|
| 113 |
-
logger.info("encoder_caching_enabled left=512 right=16")
|
| 114 |
-
|
| 115 |
logger.info(f"model_loaded strategy=malsd_batch beam_size={BEAM_SIZE}")
|
| 116 |
|
| 117 |
def _set_malsd_beam(self):
|
|
@@ -122,12 +116,12 @@ class ParakeetManager:
|
|
| 122 |
"return_best_hypothesis": True,
|
| 123 |
"score_norm": True,
|
| 124 |
"allow_cuda_graphs": False, # CPU-only
|
| 125 |
-
"max_symbols_per_step": 10,
|
| 126 |
})
|
| 127 |
OmegaConf.set_struct(cfg, False)
|
| 128 |
cfg["loop_labels"] = True
|
| 129 |
-
cfg["fused_batch_size"] = -1
|
| 130 |
-
cfg["compute_timestamps"] = False
|
| 131 |
if hasattr(cfg, "greedy"):
|
| 132 |
cfg.greedy.use_cuda_graph_decoder = False
|
| 133 |
self.model.change_decoding_strategy(cfg)
|
|
@@ -191,12 +185,7 @@ class StreamingSession:
|
|
| 191 |
self.pending = self.pending[C:]
|
| 192 |
try:
|
| 193 |
self.hyp = self.mgr.stream_step(chunk, self.hyp)
|
| 194 |
-
|
| 195 |
-
if new_text:
|
| 196 |
-
if self.text and new_text.startswith(self.text): # If cumulative (partial extends), replace with extended
|
| 197 |
-
self.text = new_text
|
| 198 |
-
else: # Else append (handles per-chunk case)
|
| 199 |
-
self.text += (' ' if self.text else '') + new_text
|
| 200 |
except Exception:
|
| 201 |
logger.exception("mic_step failed")
|
| 202 |
break
|
|
@@ -207,13 +196,7 @@ class StreamingSession:
|
|
| 207 |
final = np.concatenate([self.pending, pad])
|
| 208 |
try:
|
| 209 |
self.hyp = self.mgr.stream_step(final, self.hyp)
|
| 210 |
-
|
| 211 |
-
if new_text:
|
| 212 |
-
if self.text and new_text.startswith(self.text):
|
| 213 |
-
self.text = new_text
|
| 214 |
-
else:
|
| 215 |
-
self.text += (' ' if self.text else '') + new_text
|
| 216 |
-
self.text += '.' # Add period for sentence closure on flush
|
| 217 |
except Exception:
|
| 218 |
logger.exception("mic_flush failed")
|
| 219 |
self.pending = np.zeros(0, dtype=np.float32)
|
|
|
|
| 25 |
# ----------------------------
|
| 26 |
MODEL_NAME = os.environ.get("PARAKEET_MODEL", "nvidia/parakeet-tdt-0.6b-v3")
|
| 27 |
TARGET_SR = 16_000
|
| 28 |
+
BEAM_SIZE = int(os.environ.get("PARAKEET_BEAM_SIZE", "16")) # Increased for quality
|
| 29 |
OFFLINE_BATCH= int(os.environ.get("PARAKEET_BATCH", "8"))
|
| 30 |
+
CHUNK_S = float(os.environ.get("PARAKEET_CHUNK_S", "2.0")) # Increased for better context
|
| 31 |
+
FLUSH_PAD_S = float(os.environ.get("PARAKEET_FLUSH_PAD_S", "2.0")) # Increased for better finalization
|
| 32 |
|
| 33 |
# ----------------------------
|
| 34 |
# Logging (unified)
|
|
|
|
| 106 |
self._base_decoding = copy.deepcopy(self.model.cfg.decoding)
|
| 107 |
|
| 108 |
self._set_malsd_beam()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
logger.info(f"model_loaded strategy=malsd_batch beam_size={BEAM_SIZE}")
|
| 110 |
|
| 111 |
def _set_malsd_beam(self):
|
|
|
|
| 116 |
"return_best_hypothesis": True,
|
| 117 |
"score_norm": True,
|
| 118 |
"allow_cuda_graphs": False, # CPU-only
|
| 119 |
+
"max_symbols_per_step": 10, # Added for stability in MALSD
|
| 120 |
})
|
| 121 |
OmegaConf.set_struct(cfg, False)
|
| 122 |
cfg["loop_labels"] = True
|
| 123 |
+
cfg["fused_batch_size"] = -1 # Added for CPU compatibility
|
| 124 |
+
cfg["compute_timestamps"] = False # Added to match legacy, avoid overhead
|
| 125 |
if hasattr(cfg, "greedy"):
|
| 126 |
cfg.greedy.use_cuda_graph_decoder = False
|
| 127 |
self.model.change_decoding_strategy(cfg)
|
|
|
|
| 185 |
self.pending = self.pending[C:]
|
| 186 |
try:
|
| 187 |
self.hyp = self.mgr.stream_step(chunk, self.hyp)
|
| 188 |
+
self.text = getattr(self.hyp, "text", self.text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
except Exception:
|
| 190 |
logger.exception("mic_step failed")
|
| 191 |
break
|
|
|
|
| 196 |
final = np.concatenate([self.pending, pad])
|
| 197 |
try:
|
| 198 |
self.hyp = self.mgr.stream_step(final, self.hyp)
|
| 199 |
+
self.text = getattr(self.hyp, "text", self.text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
except Exception:
|
| 201 |
logger.exception("mic_flush failed")
|
| 202 |
self.pending = np.zeros(0, dtype=np.float32)
|