WJ88 commited on
Commit
d1310ca
·
verified ·
1 Parent(s): eef10e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -25
app.py CHANGED
@@ -25,10 +25,10 @@ from nemo.utils import logging as nemo_logging
25
  # ----------------------------
26
  MODEL_NAME = os.environ.get("PARAKEET_MODEL", "nvidia/parakeet-tdt-0.6b-v3")
27
  TARGET_SR = 16_000
28
- BEAM_SIZE = int(os.environ.get("PARAKEET_BEAM_SIZE", "32")) # Increased for subtle quality gains
29
  OFFLINE_BATCH= int(os.environ.get("PARAKEET_BATCH", "8"))
30
- CHUNK_S = float(os.environ.get("PARAKEET_CHUNK_S", "2.0"))
31
- FLUSH_PAD_S = float(os.environ.get("PARAKEET_FLUSH_PAD_S", "2.0"))
32
 
33
  # ----------------------------
34
  # Logging (unified)
@@ -106,12 +106,6 @@ class ParakeetManager:
106
  self._base_decoding = copy.deepcopy(self.model.cfg.decoding)
107
 
108
  self._set_malsd_beam()
109
-
110
- # Enable encoder caching for better streaming context (per NeMo docs/tutorials)
111
- if hasattr(self.model.encoder, "set_default_att_context_size"):
112
- self.model.encoder.set_default_att_context_size([512, 16]) # Large left for cumulative context, small right for buffering
113
- logger.info("encoder_caching_enabled left=512 right=16")
114
-
115
  logger.info(f"model_loaded strategy=malsd_batch beam_size={BEAM_SIZE}")
116
 
117
  def _set_malsd_beam(self):
@@ -122,12 +116,12 @@ class ParakeetManager:
122
  "return_best_hypothesis": True,
123
  "score_norm": True,
124
  "allow_cuda_graphs": False, # CPU-only
125
- "max_symbols_per_step": 10,
126
  })
127
  OmegaConf.set_struct(cfg, False)
128
  cfg["loop_labels"] = True
129
- cfg["fused_batch_size"] = -1
130
- cfg["compute_timestamps"] = False
131
  if hasattr(cfg, "greedy"):
132
  cfg.greedy.use_cuda_graph_decoder = False
133
  self.model.change_decoding_strategy(cfg)
@@ -191,12 +185,7 @@ class StreamingSession:
191
  self.pending = self.pending[C:]
192
  try:
193
  self.hyp = self.mgr.stream_step(chunk, self.hyp)
194
- new_text = getattr(self.hyp, "text", "")
195
- if new_text:
196
- if self.text and new_text.startswith(self.text): # If cumulative (partial extends), replace with extended
197
- self.text = new_text
198
- else: # Else append (handles per-chunk case)
199
- self.text += (' ' if self.text else '') + new_text
200
  except Exception:
201
  logger.exception("mic_step failed")
202
  break
@@ -207,13 +196,7 @@ class StreamingSession:
207
  final = np.concatenate([self.pending, pad])
208
  try:
209
  self.hyp = self.mgr.stream_step(final, self.hyp)
210
- new_text = getattr(self.hyp, "text", "")
211
- if new_text:
212
- if self.text and new_text.startswith(self.text):
213
- self.text = new_text
214
- else:
215
- self.text += (' ' if self.text else '') + new_text
216
- self.text += '.' # Add period for sentence closure on flush
217
  except Exception:
218
  logger.exception("mic_flush failed")
219
  self.pending = np.zeros(0, dtype=np.float32)
 
25
  # ----------------------------
26
  MODEL_NAME = os.environ.get("PARAKEET_MODEL", "nvidia/parakeet-tdt-0.6b-v3")
27
  TARGET_SR = 16_000
28
+ BEAM_SIZE = int(os.environ.get("PARAKEET_BEAM_SIZE", "16")) # Increased for quality
29
  OFFLINE_BATCH= int(os.environ.get("PARAKEET_BATCH", "8"))
30
+ CHUNK_S = float(os.environ.get("PARAKEET_CHUNK_S", "2.0")) # Increased for better context
31
+ FLUSH_PAD_S = float(os.environ.get("PARAKEET_FLUSH_PAD_S", "2.0")) # Increased for better finalization
32
 
33
  # ----------------------------
34
  # Logging (unified)
 
106
  self._base_decoding = copy.deepcopy(self.model.cfg.decoding)
107
 
108
  self._set_malsd_beam()
 
 
 
 
 
 
109
  logger.info(f"model_loaded strategy=malsd_batch beam_size={BEAM_SIZE}")
110
 
111
  def _set_malsd_beam(self):
 
116
  "return_best_hypothesis": True,
117
  "score_norm": True,
118
  "allow_cuda_graphs": False, # CPU-only
119
+ "max_symbols_per_step": 10, # Added for stability in MALSD
120
  })
121
  OmegaConf.set_struct(cfg, False)
122
  cfg["loop_labels"] = True
123
+ cfg["fused_batch_size"] = -1 # Added for CPU compatibility
124
+ cfg["compute_timestamps"] = False # Added to match legacy, avoid overhead
125
  if hasattr(cfg, "greedy"):
126
  cfg.greedy.use_cuda_graph_decoder = False
127
  self.model.change_decoding_strategy(cfg)
 
185
  self.pending = self.pending[C:]
186
  try:
187
  self.hyp = self.mgr.stream_step(chunk, self.hyp)
188
+ self.text = getattr(self.hyp, "text", self.text)
 
 
 
 
 
189
  except Exception:
190
  logger.exception("mic_step failed")
191
  break
 
196
  final = np.concatenate([self.pending, pad])
197
  try:
198
  self.hyp = self.mgr.stream_step(final, self.hyp)
199
+ self.text = getattr(self.hyp, "text", self.text)
 
 
 
 
 
 
200
  except Exception:
201
  logger.exception("mic_flush failed")
202
  self.pending = np.zeros(0, dtype=np.float32)