thecollabagepatch commited on
Commit
2081536
·
1 Parent(s): 0103ac5

still attempting to fix the occasional dip into silence

Browse files
Files changed (1) hide show
  1. jam_worker.py +99 -39
jam_worker.py CHANGED
@@ -117,6 +117,8 @@ class JamWorker(threading.Thread):
117
  self._spool = np.zeros((0, 2), dtype=np.float32) # (S,2) target SR
118
  self._spool_written = 0 # absolute frames written into spool
119
 
 
 
120
  # bar clock: start with offset 0; if you have a downbeat estimator, set base later
121
  self._bar_clock = BarClock(self.params.target_sr, self.params.bpm, self.params.beats_per_bar, base_offset_samples=0)
122
 
@@ -420,48 +422,106 @@ class JamWorker(threading.Thread):
420
 
421
  # ---------- core streaming helpers ----------
422
 
423
- def _append_model_chunk_and_spool(self, wav: au.Waveform):
424
- """Crossfade into the model-rate stream and write the *non-overlapped*
425
- tail to the target-SR spool."""
426
- s = wav.samples.astype(np.float32, copy=False)
427
- if s.ndim == 1:
428
- s = s[:, None]
429
- sr = self._model_sr
430
- xfade_s = float(self.mrt.config.crossfade_length)
431
- xfade_n = int(round(max(0.0, xfade_s) * sr))
432
-
433
- if self._model_stream is None:
434
- # first chunk: drop the preroll (xfade) then spool
435
- new_part = s[xfade_n:] if xfade_n < s.shape[0] else s[:0]
436
- self._model_stream = new_part.copy()
437
- if new_part.size:
438
- y = (new_part.astype(np.float32, copy=False)
439
- if self._rs is None else
440
- self._rs.process(new_part.astype(np.float32, copy=False), final=False))
441
- self._spool = np.concatenate([self._spool, y], axis=0)
442
- self._spool_written += y.shape[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  return
444
 
445
- # crossfade into existing stream
446
- if xfade_n > 0 and self._model_stream.shape[0] >= xfade_n and s.shape[0] >= xfade_n:
447
- tail = self._model_stream[-xfade_n:]
448
- head = s[:xfade_n]
449
- t = np.linspace(0, np.pi/2, xfade_n, endpoint=False, dtype=np.float32)[:, None]
450
- mixed = tail * np.cos(t) + head * np.sin(t)
451
- self._model_stream = np.concatenate([self._model_stream[:-xfade_n], mixed, s[xfade_n:]], axis=0)
452
- new_part = s[xfade_n:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  else:
454
- self._model_stream = np.concatenate([self._model_stream, s], axis=0)
455
- new_part = s
456
-
457
- # spool only the *new* non-overlapped part
458
- if new_part.size:
459
- y = (new_part.astype(np.float32, copy=False)
460
- if self._rs is None else
461
- self._rs.process(new_part.astype(np.float32, copy=False), final=False))
462
- if y.size:
463
- self._spool = np.concatenate([self._spool, y], axis=0)
464
- self._spool_written += y.shape[0]
 
 
465
 
466
  def _should_generate_next_chunk(self) -> bool:
467
  # Allow running ahead relative to whichever is larger: last *consumed*
 
117
  self._spool = np.zeros((0, 2), dtype=np.float32) # (S,2) target SR
118
  self._spool_written = 0 # absolute frames written into spool
119
 
120
+ self._pending_overlap_model = None
121
+
122
  # bar clock: start with offset 0; if you have a downbeat estimator, set base later
123
  self._bar_clock = BarClock(self.params.target_sr, self.params.bpm, self.params.beats_per_bar, base_offset_samples=0)
124
 
 
422
 
423
  # ---------- core streaming helpers ----------
424
 
425
+ def _append_model_chunk_and_spool(self, s: np.ndarray) -> None:
426
+ """
427
+ Append a newly-generated *model-rate* chunk `s` into the output spool, ensuring
428
+ the equal-power crossfade *overlap* is actually included in emitted audio.
429
+
430
+ Strategy (Option A):
431
+ - Keep the last `xfade_n` samples from the previous chunk in `self._pending_overlap_model`.
432
+ - On each new chunk, equal-power mix: mixed = tail(prev) ⨉ cos + head(curr) ⨉ sin
433
+ - Resample+append `mixed` to the target-SR spool, then append the new non-overlapped body.
434
+ - Save the new tail (last `xfade_n`) as `self._pending_overlap_model` for the next call.
435
+ - On the *very first* call (no pending tail yet), DO NOT emit the tail; only emit the body and hold the tail.
436
+
437
+ Notes:
438
+ - This function only manages the *emitted* audio content. It does not change model state.
439
+ - Works with mono or multi-channel arrays shaped [samples] or [samples, channels].
440
+ """
441
+
442
+
443
+ if s is None or s.size == 0:
444
+ return
445
+
446
+ # ---------- Helpers ----------
447
+ def _ensure_2d(x: np.ndarray) -> np.ndarray:
448
+ return x if x.ndim == 2 else x[:, None]
449
+
450
+ def _to_target_sr(y_model: np.ndarray) -> np.ndarray:
451
+ # Reuse your existing resampler here if you have one already.
452
+ # If you use a different helper, swap this call accordingly.
453
+ from utils import resample_audio # adjust if your resampler lives elsewhere
454
+ return resample_audio(y_model, self.mrt.sr, self.params.target_sr)
455
+
456
+ # Compute xfade length in *model samples*
457
+ # Prefer explicit "samples" if present; else derive from seconds.
458
+ try:
459
+ xfade_n = int(getattr(self.mrt.config, "crossfade_samples"))
460
+ except Exception:
461
+ xfade_sec = float(getattr(self.mrt.config, "crossfade_length"))
462
+ xfade_n = int(round(xfade_sec * float(self.mrt.sr)))
463
+
464
+ if xfade_n <= 0:
465
+ # No crossfade configured -> just resample whole thing and append
466
+ y = _to_target_sr(_ensure_2d(s))
467
+ self._spool = np.concatenate([self._spool, y], axis=0) if self._spool.size else y
468
+ self._spool_written += y.shape[0]
469
+ return
470
+
471
+ # Normalize shapes
472
+ s = _ensure_2d(s)
473
+ n_samps = s.shape[0]
474
+ if n_samps <= xfade_n:
475
+ # Too short to meaningfully process: accumulate into pending tail and wait
476
+ tail = s
477
+ self._pending_overlap_model = tail if self._pending_overlap_model is None \
478
+ else np.concatenate([self._pending_overlap_model, tail], axis=0)[-xfade_n:]
479
  return
480
 
481
+ # Split current chunk into head/body/tail at model rate
482
+ head = s[:xfade_n, :]
483
+ body = s[xfade_n:-xfade_n, :] if n_samps >= (2 * xfade_n) else None
484
+ tail = s[-xfade_n:, :]
485
+
486
+ # ---------- If we have a pending tail, mix it with the current head and EMIT the mix ----------
487
+ if self._pending_overlap_model is not None and self._pending_overlap_model.shape[0] == xfade_n:
488
+ prev_tail = self._pending_overlap_model
489
+
490
+ # Equal-power crossfade: tail(prev) * cos + head(curr) * sin
491
+ # Shapes: [xfade_n, C]
492
+ t = np.linspace(0.0, np.pi / 2.0, xfade_n, endpoint=False, dtype=np.float32)[:, None]
493
+ cosw = np.cos(t, dtype=np.float32)
494
+ sinw = np.sin(t, dtype=np.float32)
495
+ mixed = (prev_tail * cosw) + (head * sinw) # still model-rate
496
+
497
+ y_mixed = _to_target_sr(mixed.astype(np.float32))
498
+ # Append the mixed overlap FIRST at target rate
499
+ if self._spool.size:
500
+ self._spool = np.concatenate([self._spool, y_mixed], axis=0)
501
+ else:
502
+ self._spool = y_mixed
503
+ self._spool_written += y_mixed.shape[0]
504
+
505
+ # After mixing, we've consumed head; the "new body" to emit is whatever remains (if any)
506
+ if body is not None and body.size:
507
+ y_body = _to_target_sr(body.astype(np.float32))
508
+ self._spool = np.concatenate([self._spool, y_body], axis=0)
509
+ self._spool_written += y_body.shape[0]
510
+
511
  else:
512
+ # FIRST CHUNK: no pending overlap yet
513
+ # Emit only the body; DO NOT emit the tail (we keep it to mix with the next head)
514
+ if body is not None and body.size:
515
+ y_body = _to_target_sr(body.astype(np.float32))
516
+ if self._spool.size:
517
+ self._spool = np.concatenate([self._spool, y_body], axis=0)
518
+ else:
519
+ self._spool = y_body
520
+ self._spool_written += y_body.shape[0]
521
+ # (If there is no body because the chunk is tiny, we emit nothing yet.)
522
+
523
+ # ---------- Store the new pending tail to mix with the next head ----------
524
+ self._pending_overlap_model = tail.copy()
525
 
526
  def _should_generate_next_chunk(self) -> bool:
527
  # Allow running ahead relative to whichever is larger: last *consumed*