thecollabagepatch commited on
Commit
86dd29c
·
1 Parent(s): 82c8e97

loudness matching debug logs for sanity

Browse files
Files changed (1) hide show
  1. jam_worker.py +74 -10
jam_worker.py CHANGED
@@ -1,6 +1,8 @@
1
  # jam_worker.py - Bar-locked spool rewrite
2
  from __future__ import annotations
3
 
 
 
4
  import threading, time
5
  from dataclasses import dataclass
6
  from fractions import Fraction
@@ -435,7 +437,7 @@ class JamWorker(threading.Thread):
435
  This keeps external timing and bar alignment identical, but removes the audible
436
  fade-to-zero at chunk ends.
437
  """
438
- import numpy as np
439
 
440
  # ---- unpack model-rate samples ----
441
  s = wav.samples.astype(np.float32, copy=False)
@@ -550,20 +552,77 @@ class JamWorker(threading.Thread):
550
  return self.idx <= (horizon_anchor + self._max_buffer_ahead)
551
 
552
  def _emit_ready(self):
553
- """Emit next chunk(s) if the spool has enough samples."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  while True:
555
  start, end = self._bar_clock.bounds_for_chunk(self.idx, self.params.bars_per_chunk)
556
  if end > self._spool_written:
557
- break # need more audio
558
- loop = self._spool[start:end]
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
  # Loudness match to reference loop (optional)
 
561
  if self.params.ref_loop is not None and self.params.loudness_mode != "none":
562
  ref = self.params.ref_loop.as_stereo().resample(self.params.target_sr)
563
  wav = au.Waveform(loop.copy(), int(self.params.target_sr))
564
- matched, _ = match_loudness_to_reference(ref, wav, method=self.params.loudness_mode, headroom_db=self.params.headroom_db)
565
- loop = matched.samples
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
 
 
567
  audio_b64, total_samples, channels = wav_bytes_base64(loop, int(self.params.target_sr))
568
  meta = {
569
  "bpm": float(self.params.bpm),
@@ -580,27 +639,31 @@ class JamWorker(threading.Thread):
580
  }
581
  chunk = JamChunk(index=self.idx, audio_base64=audio_b64, metadata=meta)
582
 
 
583
  with self._cv:
584
  self._outbox[self.idx] = chunk
585
  self._cv.notify_all()
 
 
 
 
586
  self.idx += 1
587
 
588
  # If a reseed is queued, install it *right after* we finish a chunk
589
  with self._lock:
590
- # Prefer seamless token splice when available
591
  if self._pending_token_splice is not None:
592
  spliced = self._coerce_tokens(self._pending_token_splice["tokens"])
593
  try:
594
- # inplace update (no reset)
595
- self.state.context_tokens = spliced
596
  self._pending_token_splice = None
 
597
  except Exception:
598
- # fallback: full reseed using spliced tokens
599
  new_state = self.mrt.init_state()
600
  new_state.context_tokens = spliced
601
  self.state = new_state
602
  self._model_stream = None
603
  self._pending_token_splice = None
 
604
  elif self._pending_reseed is not None:
605
  ctx = self._coerce_tokens(self._pending_reseed["ctx"])
606
  new_state = self.mrt.init_state()
@@ -608,6 +671,7 @@ class JamWorker(threading.Thread):
608
  self.state = new_state
609
  self._model_stream = None
610
  self._pending_reseed = None
 
611
 
612
  # ---------- main loop ----------
613
 
 
1
  # jam_worker.py - Bar-locked spool rewrite
2
  from __future__ import annotations
3
 
4
+ import os
5
+
6
  import threading, time
7
  from dataclasses import dataclass
8
  from fractions import Fraction
 
437
  This keeps external timing and bar alignment identical, but removes the audible
438
  fade-to-zero at chunk ends.
439
  """
440
+
441
 
442
  # ---- unpack model-rate samples ----
443
  s = wav.samples.astype(np.float32, copy=False)
 
552
  return self.idx <= (horizon_anchor + self._max_buffer_ahead)
553
 
554
  def _emit_ready(self):
555
+ """Emit next chunk(s) if the spool has enough samples. With verbose RMS debug."""
556
+
557
+
558
+ QDB_SILENCE = -55.0 # quarter-bar segment considered "near silence" if RMS dBFS below this
559
+ EPS = 1e-12
560
+
561
+ def rms_dbfs(x: np.ndarray) -> float:
562
+ # x: float32 [-1,1]; return single-channel RMS dBFS (mean over channels if stereo)
563
+ if x.ndim == 2:
564
+ x = x.mean(axis=1)
565
+ rms = float(np.sqrt(np.mean(np.square(x)) + EPS))
566
+ return 20.0 * np.log10(max(rms, EPS))
567
+
568
+ def qbar_rms_dbfs(x: np.ndarray, seg_len: int) -> list[float]:
569
+ vals = []
570
+ if x.ndim == 2:
571
+ mono = x.mean(axis=1)
572
+ else:
573
+ mono = x
574
+ N = mono.shape[0]
575
+ for i in range(0, N, seg_len):
576
+ seg = mono[i:min(i + seg_len, N)]
577
+ if seg.size == 0:
578
+ break
579
+ r = float(np.sqrt(np.mean(seg * seg) + EPS))
580
+ vals.append(20.0 * np.log10(max(r, EPS)))
581
+ return vals
582
+
583
  while True:
584
  start, end = self._bar_clock.bounds_for_chunk(self.idx, self.params.bars_per_chunk)
585
  if end > self._spool_written:
586
+ # Not enough audio buffered for the next full chunk
587
+ # Debug the readiness gap once per idx
588
+ # print(f"[emit idx={self.idx}] need end={end}, have={self._spool_written} (Δ={end - self._spool_written})")
589
+ break
590
+
591
+ # Slice the emitted window (target SR)
592
+ loop = self._spool[start:end] # shape: [samples, channels] @ target_sr
593
+
594
+ # ---- DEBUG: pre-loudness quarter-bar RMS ----
595
+ spb = self._bar_clock.bar_samps # samples per bar @ target_sr
596
+ qlen = max(1, spb // 4) # quarter-bar segment length
597
+ q_rms_pre = qbar_rms_dbfs(loop, qlen)
598
+ # Mark segments that look like near-silence
599
+ silent_marks_pre = ["🟢" if v > QDB_SILENCE else "🟥" for v in q_rms_pre[:8]]
600
+ print(f"[emit idx={self.idx}] pre-LM qRMS dBFS: {['%5.1f'%v for v in q_rms_pre[:8]]} {''.join(silent_marks_pre)}")
601
 
602
  # Loudness match to reference loop (optional)
603
+ gain_db_applied = None
604
  if self.params.ref_loop is not None and self.params.loudness_mode != "none":
605
  ref = self.params.ref_loop.as_stereo().resample(self.params.target_sr)
606
  wav = au.Waveform(loop.copy(), int(self.params.target_sr))
607
+ try:
608
+ matched, gain_db_applied = match_loudness_to_reference(
609
+ ref, wav,
610
+ method=self.params.loudness_mode,
611
+ headroom_db=self.params.headroom_db
612
+ )
613
+ loop = matched.samples
614
+ except Exception as e:
615
+ print(f"[emit idx={self.idx}] loudness-match ERROR: {e}; proceeding with un-matched audio")
616
+
617
+ # ---- DEBUG: post-loudness quarter-bar RMS ----
618
+ q_rms_post = qbar_rms_dbfs(loop, qlen)
619
+ silent_marks_post = ["🟢" if v > QDB_SILENCE else "🟥" for v in q_rms_post[:8]]
620
+ if gain_db_applied is None:
621
+ print(f"[emit idx={self.idx}] post-LM qRMS dBFS: {['%5.1f'%v for v in q_rms_post[:8]]} {''.join(silent_marks_post)} (LM: none)")
622
+ else:
623
+ print(f"[emit idx={self.idx}] post-LM qRMS dBFS: {['%5.1f'%v for v in q_rms_post[:8]]} {''.join(silent_marks_post)} (LM gain {gain_db_applied:+.2f} dB)")
624
 
625
+ # Encode & ship
626
  audio_b64, total_samples, channels = wav_bytes_base64(loop, int(self.params.target_sr))
627
  meta = {
628
  "bpm": float(self.params.bpm),
 
639
  }
640
  chunk = JamChunk(index=self.idx, audio_base64=audio_b64, metadata=meta)
641
 
642
+ # Emit to outbox
643
  with self._cv:
644
  self._outbox[self.idx] = chunk
645
  self._cv.notify_all()
646
+
647
+ # ---- DEBUG: boundary bookkeeping ----
648
+ print(f"[emit idx={self.idx}] slice [{start}:{end}] (len={end-start}), spool_written={self._spool_written}")
649
+
650
  self.idx += 1
651
 
652
  # If a reseed is queued, install it *right after* we finish a chunk
653
  with self._lock:
 
654
  if self._pending_token_splice is not None:
655
  spliced = self._coerce_tokens(self._pending_token_splice["tokens"])
656
  try:
657
+ self.state.context_tokens = spliced # in-place update
 
658
  self._pending_token_splice = None
659
+ print(f"[emit idx={self.idx}] installed token splice (in-place)")
660
  except Exception:
 
661
  new_state = self.mrt.init_state()
662
  new_state.context_tokens = spliced
663
  self.state = new_state
664
  self._model_stream = None
665
  self._pending_token_splice = None
666
+ print(f"[emit idx={self.idx}] installed token splice (reinit state)")
667
  elif self._pending_reseed is not None:
668
  ctx = self._coerce_tokens(self._pending_reseed["ctx"])
669
  new_state = self.mrt.init_state()
 
671
  self.state = new_state
672
  self._model_stream = None
673
  self._pending_reseed = None
674
+ print(f"[emit idx={self.idx}] performed full reseed")
675
 
676
  # ---------- main loop ----------
677