Spaces:

thecollabagepatch
/

magenta-retry

Running

App Files Files Community

thecollabagepatch commited on Aug 13

Commit

d1afbc8

1 Parent(s): 2f6eca9

initial commit

Browse files

Files changed (4) hide show

Dockerfile +138 -0
app.py +436 -0
jam_worker.py +231 -0
utils.py +168 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,138 @@

+# thecollabagepatch/magenta:latest
+FROM nvidia/cuda:12.6.2-cudnn-runtime-ubuntu22.04
+# CUDA libs present + on loader path
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    cuda-libraries-12-4 && rm -rf /var/lib/apt/lists/*
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda-12.4/lib64:/usr/local/cuda-12.4/compat:/usr/local/cuda/targets/x86_64-linux/lib:${LD_LIBRARY_PATH}
+RUN ln -sf /usr/local/cuda/targets/x86_64-linux/lib /usr/local/cuda/lib64 || true
+# Ensure the NVIDIA repo key is present (non-interactive) and install cuDNN 9.8
+RUN set -eux; \
+  apt-get update && apt-get install -y --no-install-recommends gnupg ca-certificates curl; \
+  install -d -m 0755 /usr/share/keyrings; \
+  # Refresh the *same* keyring the base source uses (no second source file)
+  curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/3bf863cc.pub \
+    | gpg --batch --yes --dearmor -o /usr/share/keyrings/cuda-archive-keyring.gpg; \
+  apt-get update; \
+  # If libcudnn is "held", unhold it so we can move to 9.8
+  apt-mark unhold libcudnn9-cuda-12 || true; \
+  # Install cuDNN 9.8 for CUDA 12 (correct dev package name!)
+  apt-get install -y --no-install-recommends \
+      'libcudnn9-cuda-12=9.8.*' \
+      'libcudnn9-dev-cuda-12=9.8.*' \
+      --allow-downgrades --allow-change-held-packages; \
+  apt-mark hold libcudnn9-cuda-12 || true; \
+  ldconfig; \
+  rm -rf /var/lib/apt/lists/*
+# (optional) preload workaround if still needed
+ENV LD_PRELOAD=/usr/local/cuda/lib64/libcusparse.so.12:/usr/local/cuda/lib64/libcublas.so.12:/usr/local/cuda/lib64/libcublasLt.so.12:/usr/local/cuda/lib64/libcufft.so.11:/usr/local/cuda/lib64/libcusolver.so.11
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    TF_FORCE_GPU_ALLOW_GROWTH=true \
+    XLA_PYTHON_CLIENT_PREALLOCATE=false
+ENV JAX_PLATFORMS=""
+# --- OS deps ---
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common curl ca-certificates git \
+    libsndfile1 ffmpeg \
+    build-essential pkg-config \
+    && add-apt-repository ppa:deadsnakes/ppa -y \
+    && apt-get update && apt-get install -y --no-install-recommends \
+    python3.11 python3.11-venv python3.11-distutils python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+# Make python3 => 3.11 for convenience
+RUN ln -sf /usr/bin/python3.11 /usr/bin/python && python -m pip install --upgrade pip
+# --- Python deps (pin order matters!) ---
+# 1) JAX CUDA pins
+RUN python -m pip install "jax[cuda12]==0.6.2" "jaxlib==0.6.2"
+# 2) Lock seqio early to avoid backtracking madness
+RUN python -m pip install "seqio==0.0.11"
+# 3) Install Magenta RT *without* deps so we control pins
+RUN python -m pip install --no-deps 'git+https://github.com/magenta/magenta-realtime#egg=magenta_rt[gpu]'
+# 4) TF nightlies (MATCH DATES!)
+RUN python -m pip install \
+    "tf_nightly==2.20.0.dev20250619" \
+    "tensorflow-text-nightly==2.20.0.dev20250316" \
+    "tf-hub-nightly"
+# 5) tf2jax pinned alongside tf_nightly so pip doesn’t drag stable TF
+RUN python -m pip install tf2jax "tf_nightly==2.20.0.dev20250619"
+# 6) The rest of MRT deps + API runtime deps
+RUN python -m pip install \
+    gin-config librosa resampy soundfile \
+    google-auth google-auth-oauthlib google-auth-httplib2 \
+    google-api-core googleapis-common-protos google-resumable-media \
+    google-cloud-storage requests tqdm typing-extensions numpy==2.1.3 \
+    fastapi uvicorn[standard] python-multipart pyloudnorm
+# 7) Exact commits for T5X/Flaxformer as in pyproject
+RUN python -m pip install \
+    "t5x @ git+https://github.com/google-research/t5x.git@92c5b46" \
+    "flaxformer @ git+https://github.com/google/flaxformer@399ea3a"
+# ---- FINAL: enforce TF nightlies and clean any stable TF ----
+RUN python - <<'PY'
+import sys, sysconfig, glob, os, shutil
+# Find a writable site dir (site-packages OR dist-packages)
+cands = [sysconfig.get_paths().get('purelib'), sysconfig.get_paths().get('platlib')]
+cands += [p for p in sys.path if p and p.endswith(('site-packages','dist-packages'))]
+site = next(p for p in cands if p and os.path.isdir(p))
+patterns = [
+  "tensorflow", "tensorflow-*.dist-info", "tensorflow-*.egg-info",
+  "tf-nightly-*.dist-info", "tf_nightly-*.dist-info",
+  "tensorflow_text", "tensorflow_text-*.dist-info",
+  "tf-hub-nightly-*.dist-info", "tf_hub_nightly-*.dist-info",
+  "tf_keras-nightly-*.dist-info", "tf_keras_nightly-*.dist-info",
+  "tensorboard*", "tb-nightly-*.dist-info",
+  "keras*",  # remove stray keras
+  "tensorflow_hub*", "tensorflow_io*",
+]
+for pat in patterns:
+  for path in glob.glob(os.path.join(site, pat)):
+    if os.path.isdir(path): shutil.rmtree(path, ignore_errors=True)
+    else:
+      try: os.remove(path)
+      except FileNotFoundError: pass
+print("TF/Hub/Text cleared in:", site)
+PY
+# Reinstall pinned nightlies in ONE transaction
+RUN python -m pip install --no-cache-dir --force-reinstall \
+    "tf-nightly==2.20.0.dev20250619" \
+    "tensorflow-text-nightly==2.20.0.dev20250316" \
+    "tf-hub-nightly"
+RUN python -m pip install huggingface_hub
+RUN python -m pip install --no-cache-dir --force-reinstall "protobuf==4.25.3"
+# Switch to Spaces’ preferred user
+# Switch to Spaces’ preferred user
+RUN useradd -m -u 1000 appuser
+WORKDIR /home/appuser/app
+# Copy from *build context* into image, owned by appuser
+COPY --chown=appuser:appuser app.py /home/appuser/app/app.py
+# NEW: shared utils + worker
+COPY --chown=appuser:appuser utils.py /home/appuser/app/utils.py
+COPY --chown=appuser:appuser jam_worker.py /home/appuser/app/jam_worker.py
+USER appuser
+EXPOSE 7860
+CMD ["bash", "-lc", "python -m uvicorn app:app --host 0.0.0.0 --port ${PORT:-7860}"]

app.py ADDED Viewed

	@@ -0,0 +1,436 @@

+from magenta_rt import system, audio as au
+import numpy as np
+from fastapi import FastAPI, UploadFile, File, Form, Body, HTTPException, Response
+import tempfile, io, base64, math, threading
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import contextmanager
+import soundfile as sf
+import numpy as np
+from math import gcd
+from scipy.signal import resample_poly
+from utils import (
+    match_loudness_to_reference, stitch_generated, hard_trim_seconds,
+    apply_micro_fades, make_bar_aligned_context, take_bar_aligned_tail,
+    resample_and_snap, wav_bytes_base64
+)
+from jam_worker import JamWorker, JamParams, JamChunk
+import uuid, threading
+jam_registry: dict[str, JamWorker] = {}
+jam_lock = threading.Lock()
+@contextmanager
+def mrt_overrides(mrt, **kwargs):
+    """Temporarily set attributes on MRT if they exist; restore after."""
+    old = {}
+    try:
+        for k, v in kwargs.items():
+            if hasattr(mrt, k):
+                old[k] = getattr(mrt, k)
+                setattr(mrt, k, v)
+        yield
+    finally:
+        for k, v in old.items():
+            setattr(mrt, k, v)
+# loudness utils
+try:
+    import pyloudnorm as pyln
+    _HAS_LOUDNORM = True
+except Exception:
+    _HAS_LOUDNORM = False
+# ----------------------------
+# Main generation (single combined style vector)
+# ----------------------------
+def generate_loop_continuation_with_mrt(
+    mrt,
+    input_wav_path: str,
+    bpm: float,
+    extra_styles=None,
+    style_weights=None,
+    bars: int = 8,
+    beats_per_bar: int = 4,
+    loop_weight: float = 1.0,
+    loudness_mode: str = "auto",
+    loudness_headroom_db: float = 1.0,
+    intro_bars_to_drop: int = 0,             # <— NEW
+):
+    # Load & prep (unchanged)
+    loop = au.Waveform.from_file(input_wav_path).resample(mrt.sample_rate).as_stereo()
+    # Use tail for context (your recent change)
+    codec_fps   = float(mrt.codec.frame_rate)
+    ctx_seconds = float(mrt.config.context_length_frames) / codec_fps
+    loop_for_context = take_bar_aligned_tail(loop, bpm, beats_per_bar, ctx_seconds)
+    tokens_full = mrt.codec.encode(loop_for_context).astype(np.int32)
+    tokens = tokens_full[:, :mrt.config.decoder_codec_rvq_depth]
+    # Bar-aligned token window (unchanged)
+    context_tokens = make_bar_aligned_context(
+        tokens, bpm=bpm, fps=int(mrt.codec.frame_rate),
+        ctx_frames=mrt.config.context_length_frames, beats_per_bar=beats_per_bar
+    )
+    state = mrt.init_state()
+    state.context_tokens = context_tokens
+    # STYLE embed (optional: switch to loop_for_context if you want stronger “recent” bias)
+    loop_embed = mrt.embed_style(loop_for_context)
+    embeds, weights = [loop_embed], [float(loop_weight)]
+    if extra_styles:
+        for i, s in enumerate(extra_styles):
+            if s.strip():
+                embeds.append(mrt.embed_style(s.strip()))
+                w = style_weights[i] if (style_weights and i < len(style_weights)) else 1.0
+                weights.append(float(w))
+    wsum = float(sum(weights)) or 1.0
+    weights = [w / wsum for w in weights]
+    combined_style = np.sum([w * e for w, e in zip(weights, embeds)], axis=0).astype(loop_embed.dtype)
+    # --- Length math ---
+    seconds_per_bar = beats_per_bar * (60.0 / bpm)
+    total_secs      = bars * seconds_per_bar
+    drop_bars       = max(0, int(intro_bars_to_drop))
+    drop_secs       = min(drop_bars, bars) * seconds_per_bar       # clamp to <= bars
+    gen_total_secs  = total_secs + drop_secs                       # generate extra
+    # Chunk scheduling to cover gen_total_secs
+    chunk_secs = mrt.config.chunk_length_frames * mrt.config.frame_length_samples / mrt.sample_rate  # ~2.0
+    steps = int(math.ceil(gen_total_secs / chunk_secs)) + 1  # pad then trim
+    # Generate
+    chunks = []
+    for _ in range(steps):
+        wav, state = mrt.generate_chunk(state=state, style=combined_style)
+        chunks.append(wav)
+    # Stitch continuous audio
+    stitched = stitch_generated(chunks, mrt.sample_rate, mrt.config.crossfade_length).as_stereo()
+    # Trim to generated length (bars + dropped bars)
+    stitched = hard_trim_seconds(stitched, gen_total_secs)
+    # 👉 Drop the intro bars
+    if drop_secs > 0:
+        n_drop = int(round(drop_secs * stitched.sample_rate))
+        stitched = au.Waveform(stitched.samples[n_drop:], stitched.sample_rate)
+    # Final exact-length trim to requested bars
+    out = hard_trim_seconds(stitched, total_secs)
+    # Final polish AFTER drop
+    out = out.peak_normalize(0.95)
+    apply_micro_fades(out, 5)
+    # Loudness match to input (after drop) so bar 1 sits right
+    out, loud_stats = match_loudness_to_reference(
+        ref=loop, target=out,
+        method=loudness_mode, headroom_db=loudness_headroom_db
+    )
+    return out, loud_stats
+# ----------------------------
+# FastAPI app with lazy, thread-safe model init
+# ----------------------------
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],   # or lock to your domain(s)
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+_MRT = None
+_MRT_LOCK = threading.Lock()
+def get_mrt():
+    global _MRT
+    if _MRT is None:
+        with _MRT_LOCK:
+            if _MRT is None:
+                _MRT = system.MagentaRT(tag="base", guidance_weight=1.0, device="gpu", lazy=False)
+    return _MRT
+@app.post("/generate")
+def generate(
+    loop_audio: UploadFile = File(...),
+    bpm: float = Form(...),
+    bars: int = Form(8),
+    beats_per_bar: int = Form(4),
+    styles: str = Form("acid house"),
+    style_weights: str = Form(""),
+    loop_weight: float = Form(1.0),
+    loudness_mode: str = Form("auto"),
+    loudness_headroom_db: float = Form(1.0),
+    guidance_weight: float = Form(5.0),
+    temperature: float = Form(1.1),
+    topk: int = Form(40),
+    target_sample_rate: int | None = Form(None),
+    intro_bars_to_drop: int = Form(0),          # <— NEW
+):
+    # Read file
+    data = loop_audio.file.read()
+    if not data:
+        return {"error": "Empty file"}
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        tmp.write(data)
+        tmp_path = tmp.name
+    # Parse styles + weights
+    extra_styles = [s for s in (styles.split(",") if styles else []) if s.strip()]
+    weights = [float(x) for x in style_weights.split(",")] if style_weights else None
+    mrt = get_mrt()  # warm once, in this worker thread
+    # Temporarily override MRT inference knobs for this request
+    with mrt_overrides(mrt,
+                       guidance_weight=guidance_weight,
+                       temperature=temperature,
+                       topk=topk):
+        wav, loud_stats = generate_loop_continuation_with_mrt(
+            mrt,
+            input_wav_path=tmp_path,
+            bpm=bpm,
+            extra_styles=extra_styles,
+            style_weights=weights,
+            bars=bars,
+            beats_per_bar=beats_per_bar,
+            loop_weight=loop_weight,
+            loudness_mode=loudness_mode,
+            loudness_headroom_db=loudness_headroom_db,
+            intro_bars_to_drop=intro_bars_to_drop,   # <— pass through
+        )
+    # 1) Figure out the desired SR
+    inp_info = sf.info(tmp_path)
+    input_sr = int(inp_info.samplerate)
+    target_sr = int(target_sample_rate or input_sr)
+    # 2) Convert to target SR + snap to exact bars
+    cur_sr = int(mrt.sample_rate)
+    x = wav.samples if wav.samples.ndim == 2 else wav.samples[:, None]
+    seconds_per_bar = (60.0 / float(bpm)) * int(beats_per_bar)
+    expected_secs = float(bars) * seconds_per_bar
+    x = resample_and_snap(x, cur_sr=cur_sr, target_sr=target_sr, seconds=expected_secs)
+    # 3) Encode WAV once (no extra write)
+    audio_b64, total_samples, channels = wav_bytes_base64(x, target_sr)
+    loop_duration_seconds = total_samples / float(target_sr)
+    # 4) Metadata
+    metadata = {
+        "bpm": int(round(bpm)),
+        "bars": int(bars),
+        "beats_per_bar": int(beats_per_bar),
+        "styles": extra_styles,
+        "style_weights": weights,
+        "loop_weight": loop_weight,
+        "loudness": loud_stats,
+        "sample_rate": int(target_sr),
+        "channels": int(channels),
+        "crossfade_seconds": mrt.config.crossfade_length,
+        "total_samples": int(total_samples),
+        "seconds_per_bar": seconds_per_bar,
+        "loop_duration_seconds": loop_duration_seconds,
+        "guidance_weight": guidance_weight,
+        "temperature": temperature,
+        "topk": topk,
+    }
+    return {"audio_base64": audio_b64, "metadata": metadata}
+# ----------------------------
+# the 'keep jamming' button
+# ----------------------------
+@app.post("/jam/start")
+def jam_start(
+    loop_audio: UploadFile = File(...),
+    bpm: float = Form(...),
+    bars_per_chunk: int = Form(4),
+    beats_per_bar: int = Form(4),
+    styles: str = Form(""),
+    style_weights: str = Form(""),
+    loop_weight: float = Form(1.0),
+    loudness_mode: str = Form("auto"),
+    loudness_headroom_db: float = Form(1.0),
+    guidance_weight: float = Form(1.1),
+    temperature: float = Form(1.1),
+    topk: int = Form(40),
+    target_sample_rate: int | None = Form(None),
+):
+    # enforce single active jam per GPU
+    with jam_lock:
+        for sid, w in list(jam_registry.items()):
+            if w.is_alive():
+                raise HTTPException(status_code=429, detail="A jam is already running. Try again later.")
+    # read input + prep context/style (reuse your existing code)
+    data = loop_audio.file.read()
+    if not data: raise HTTPException(status_code=400, detail="Empty file")
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        tmp.write(data); tmp_path = tmp.name
+    mrt = get_mrt()
+    loop = au.Waveform.from_file(tmp_path).resample(mrt.sample_rate).as_stereo()
+    # build tail context + style vec (tail-biased)
+    codec_fps = float(mrt.codec.frame_rate)
+    ctx_seconds = float(mrt.config.context_length_frames) / codec_fps
+    loop_tail = take_bar_aligned_tail(loop, bpm, beats_per_bar, ctx_seconds)
+    # style vec = normalized mix of loop_tail + extra styles
+    embeds, weights = [mrt.embed_style(loop_tail)], [float(loop_weight)]
+    extra = [s for s in (styles.split(",") if styles else []) if s.strip()]
+    sw = [float(x) for x in style_weights.split(",")] if style_weights else []
+    for i, s in enumerate(extra):
+        embeds.append(mrt.embed_style(s.strip()))
+        weights.append(sw[i] if i < len(sw) else 1.0)
+    wsum = sum(weights) or 1.0
+    weights = [w / wsum for w in weights]
+    style_vec = np.sum([w * e for w, e in zip(weights, embeds)], axis=0).astype(embeds[0].dtype)
+    # target SR (default input SR)
+    inp_info = sf.info(tmp_path)
+    input_sr = int(inp_info.samplerate)
+    target_sr = int(target_sample_rate or input_sr)
+    params = JamParams(
+        bpm=bpm,
+        beats_per_bar=beats_per_bar,
+        bars_per_chunk=bars_per_chunk,
+        target_sr=target_sr,
+        loudness_mode=loudness_mode,
+        headroom_db=loudness_headroom_db,
+        style_vec=style_vec,
+        ref_loop=loop_tail,                    # For loudness matching
+        combined_loop=loop,                    # NEW: Full loop for context setup
+        guidance_weight=guidance_weight,
+        temperature=temperature,
+        topk=topk
+    )
+    worker = JamWorker(mrt, params)
+    sid = str(uuid.uuid4())
+    with jam_lock:
+        jam_registry[sid] = worker
+    worker.start()
+    return {"session_id": sid}
+@app.get("/jam/next")
+def jam_next(session_id: str):
+    """
+    Get the next sequential chunk in the jam session.
+    This ensures chunks are delivered in order without gaps.
+    """
+    with jam_lock:
+        worker = jam_registry.get(session_id)
+    if worker is None or not worker.is_alive():
+        raise HTTPException(status_code=404, detail="Session not found")
+    # Get the next sequential chunk (this blocks until ready)
+    chunk = worker.get_next_chunk()
+    if chunk is None:
+        raise HTTPException(status_code=408, detail="Chunk not ready within timeout")
+    return {
+        "chunk": {
+            "index": chunk.index,
+            "audio_base64": chunk.audio_base64,
+            "metadata": chunk.metadata
+        }
+    }
+@app.post("/jam/consume")
+def jam_consume(session_id: str = Form(...), chunk_index: int = Form(...)):
+    """
+    Mark a chunk as consumed by the frontend.
+    This helps the worker manage its buffer and generation flow.
+    """
+    with jam_lock:
+        worker = jam_registry.get(session_id)
+    if worker is None or not worker.is_alive():
+        raise HTTPException(status_code=404, detail="Session not found")
+    worker.mark_chunk_consumed(chunk_index)
+    return {"consumed": chunk_index}
+@app.post("/jam/stop")
+def jam_stop(session_id: str = Body(..., embed=True)):
+    with jam_lock:
+        worker = jam_registry.get(session_id)
+    if worker is None:
+        raise HTTPException(status_code=404, detail="Session not found")
+    worker.stop()
+    worker.join(timeout=5.0)
+    if worker.is_alive():
+        # It’s daemon=True, so it won’t block process exit, but report it
+        print(f"⚠️ JamWorker {session_id} did not stop within timeout")
+    with jam_lock:
+        jam_registry.pop(session_id, None)
+    return {"stopped": True}
+@app.post("/jam/update")
+def jam_update(session_id: str = Form(...),
+               guidance_weight: float | None = Form(None),
+               temperature: float | None = Form(None),
+               topk: int | None = Form(None)):
+    with jam_lock:
+        worker = jam_registry.get(session_id)
+    if worker is None or not worker.is_alive():
+        raise HTTPException(status_code=404, detail="Session not found")
+    worker.update_knobs(guidance_weight=guidance_weight, temperature=temperature, topk=topk)
+    return {"ok": True}
+@app.get("/jam/status")
+def jam_status(session_id: str):
+    with jam_lock:
+        worker = jam_registry.get(session_id)
+    if worker is None:
+        raise HTTPException(status_code=404, detail="Session not found")
+    running = worker.is_alive()
+    # Snapshot safely
+    with worker._lock:
+        last_generated = int(worker.idx)
+        last_delivered = int(worker._last_delivered_index)
+        queued = len(worker.outbox)
+        buffer_ahead = last_generated - last_delivered
+        p = worker.params
+        spb = p.beats_per_bar * (60.0 / p.bpm)
+        chunk_secs = p.bars_per_chunk * spb
+    return {
+        "running": running,
+        "last_generated_index": last_generated,       # Last chunk that finished generating
+        "last_delivered_index": last_delivered,       # Last chunk sent to frontend
+        "buffer_ahead": buffer_ahead,                  # How many chunks ahead we are
+        "queued_chunks": queued,                       # Total chunks in outbox
+        "bpm": p.bpm,
+        "beats_per_bar": p.beats_per_bar,
+        "bars_per_chunk": p.bars_per_chunk,
+        "seconds_per_bar": spb,
+        "chunk_duration_seconds": chunk_secs,
+        "target_sample_rate": p.target_sr,
+        "last_chunk_started_at": worker.last_chunk_started_at,
+        "last_chunk_completed_at": worker.last_chunk_completed_at,
+    }
+@app.get("/health")
+def health():
+    return {"ok": True}

jam_worker.py ADDED Viewed

	@@ -0,0 +1,231 @@

+# jam_worker.py - SIMPLE FIX VERSION
+import threading, time, base64, io, uuid
+from dataclasses import dataclass, field
+import numpy as np
+import soundfile as sf
+from utils import (
+    match_loudness_to_reference, stitch_generated, hard_trim_seconds,
+    apply_micro_fades, make_bar_aligned_context, take_bar_aligned_tail,
+    resample_and_snap, wav_bytes_base64
+)
+@dataclass
+class JamParams:
+    bpm: float
+    beats_per_bar: int
+    bars_per_chunk: int
+    target_sr: int
+    loudness_mode: str = "auto"
+    headroom_db: float = 1.0
+    style_vec: np.ndarray | None = None
+    ref_loop: any = None
+    combined_loop: any = None
+    guidance_weight: float = 1.1
+    temperature: float = 1.1
+    topk: int = 40
+@dataclass
+class JamChunk:
+    index: int
+    audio_base64: str
+    metadata: dict
+class JamWorker(threading.Thread):
+    def __init__(self, mrt, params: JamParams):
+        super().__init__(daemon=True)
+        self.mrt = mrt
+        self.params = params
+        self.state = mrt.init_state()
+        if params.combined_loop is not None:
+            self._setup_context_from_combined_loop()
+        self.idx = 0
+        self.outbox: list[JamChunk] = []
+        self._stop_event = threading.Event()
+        # NEW: Track delivery state
+        self._last_delivered_index = 0
+        self._max_buffer_ahead = 5  # Don't generate more than 3 chunks ahead
+        # Timing info
+        self.last_chunk_started_at = None
+        self.last_chunk_completed_at = None
+        self._lock = threading.Lock()
+    def _setup_context_from_combined_loop(self):
+        """Set up MRT context tokens from the combined loop audio"""
+        try:
+            from utils import make_bar_aligned_context, take_bar_aligned_tail
+            codec_fps = float(self.mrt.codec.frame_rate)
+            ctx_seconds = float(self.mrt.config.context_length_frames) / codec_fps
+            loop_for_context = take_bar_aligned_tail(
+                self.params.combined_loop,
+                self.params.bpm,
+                self.params.beats_per_bar,
+                ctx_seconds
+            )
+            tokens_full = self.mrt.codec.encode(loop_for_context).astype(np.int32)
+            tokens = tokens_full[:, :self.mrt.config.decoder_codec_rvq_depth]
+            context_tokens = make_bar_aligned_context(
+                tokens,
+                bpm=self.params.bpm,
+                fps=int(self.mrt.codec.frame_rate),
+                ctx_frames=self.mrt.config.context_length_frames,
+                beats_per_bar=self.params.beats_per_bar
+            )
+            self.state.context_tokens = context_tokens
+            print(f"✅ JamWorker: Set up fresh context from combined loop")
+        except Exception as e:
+            print(f"❌ Failed to setup context from combined loop: {e}")
+    def stop(self):
+        self._stop_event.set()
+    def update_knobs(self, *, guidance_weight=None, temperature=None, topk=None):
+        with self._lock:
+            if guidance_weight is not None: self.params.guidance_weight = float(guidance_weight)
+            if temperature is not None:     self.params.temperature     = float(temperature)
+            if topk is not None:            self.params.topk            = int(topk)
+    def get_next_chunk(self) -> JamChunk | None:
+        """Get the next sequential chunk (blocks/waits if not ready)"""
+        target_index = self._last_delivered_index + 1
+        # Wait for the target chunk to be ready (with timeout)
+        max_wait = 30.0  # seconds
+        start_time = time.time()
+        while time.time() - start_time < max_wait and not self._stop_event.is_set():
+            with self._lock:
+                # Look for the exact chunk we need
+                for chunk in self.outbox:
+                    if chunk.index == target_index:
+                        self._last_delivered_index = target_index
+                        print(f"📦 Delivered chunk {target_index}")
+                        return chunk
+            # Not ready yet, wait a bit
+            time.sleep(0.1)
+        # Timeout or stopped
+        return None
+    def mark_chunk_consumed(self, chunk_index: int):
+        """Mark a chunk as consumed by the frontend"""
+        with self._lock:
+            self._last_delivered_index = max(self._last_delivered_index, chunk_index)
+            print(f"✅ Chunk {chunk_index} consumed")
+    def _should_generate_next_chunk(self) -> bool:
+        """Check if we should generate the next chunk (don't get too far ahead)"""
+        with self._lock:
+            # Don't generate if we're already too far ahead
+            if self.idx > self._last_delivered_index + self._max_buffer_ahead:
+                return False
+            return True
+    def _seconds_per_bar(self) -> float:
+        return self.params.beats_per_bar * (60.0 / self.params.bpm)
+    def _snap_and_encode(self, y, seconds, target_sr, bars):
+        cur_sr = int(self.mrt.sample_rate)
+        x = y.samples if y.samples.ndim == 2 else y.samples[:, None]
+        x = resample_and_snap(x, cur_sr=cur_sr, target_sr=target_sr, seconds=seconds)
+        b64, total_samples, channels = wav_bytes_base64(x, target_sr)
+        meta = {
+            "bpm": int(round(self.params.bpm)),
+            "bars": int(bars),
+            "beats_per_bar": int(self.params.beats_per_bar),
+            "sample_rate": int(target_sr),
+            "channels": channels,
+            "total_samples": total_samples,
+            "seconds_per_bar": self._seconds_per_bar(),
+            "loop_duration_seconds": bars * self._seconds_per_bar(),
+            "guidance_weight": self.params.guidance_weight,
+            "temperature": self.params.temperature,
+            "topk": self.params.topk,
+        }
+        return b64, meta
+    def run(self):
+        """Main worker loop - generate chunks continuously but don't get too far ahead"""
+        spb = self._seconds_per_bar()
+        chunk_secs = self.params.bars_per_chunk * spb
+        xfade = self.mrt.config.crossfade_length
+        print("🚀 JamWorker started with flow control...")
+        while not self._stop_event.is_set():
+            # Check if we should generate the next chunk
+            if not self._should_generate_next_chunk():
+                # We're ahead enough, wait a bit for frontend to catch up
+                print(f"⏸️  Buffer full, waiting for consumption...")
+                time.sleep(0.5)
+                continue
+            # Generate the next chunk
+            with self._lock:
+                style_vec = self.params.style_vec
+                self.mrt.guidance_weight = self.params.guidance_weight
+                self.mrt.temperature = self.params.temperature
+                self.mrt.topk = self.params.topk
+                next_idx = self.idx + 1
+            print(f"🎹 Generating chunk {next_idx}...")
+            # Generate enough model chunks to cover chunk_secs
+            need = chunk_secs
+            chunks = []
+            self.last_chunk_started_at = time.time()
+            while need > 0 and not self._stop_event.is_set():
+                wav, self.state = self.mrt.generate_chunk(state=self.state, style=style_vec)
+                chunks.append(wav)
+                need -= (wav.samples.shape[0] / float(self.mrt.sample_rate))
+            if self._stop_event.is_set():
+                break
+            # Stitch and trim to exact seconds at model SR
+            y = stitch_generated(chunks, self.mrt.sample_rate, xfade).as_stereo()
+            y = hard_trim_seconds(y, chunk_secs)
+            # Post-process
+            if next_idx == 1 and self.params.ref_loop is not None:
+                y, _ = match_loudness_to_reference(
+                    self.params.ref_loop, y,
+                    method=self.params.loudness_mode,
+                    headroom_db=self.params.headroom_db
+                )
+            else:
+                apply_micro_fades(y, 3)
+            # Resample + snap + b64
+            b64, meta = self._snap_and_encode(
+                y, seconds=chunk_secs,
+                target_sr=self.params.target_sr,
+                bars=self.params.bars_per_chunk
+            )
+            # Store the completed chunk
+            with self._lock:
+                self.idx = next_idx
+                self.outbox.append(JamChunk(index=next_idx, audio_base64=b64, metadata=meta))
+                # Keep outbox bounded (remove old chunks)
+                if len(self.outbox) > 10:
+                    # Remove chunks that are way behind the delivery point
+                    self.outbox = [ch for ch in self.outbox if ch.index > self._last_delivered_index - 5]
+            self.last_chunk_completed_at = time.time()
+            print(f"✅ Completed chunk {next_idx}")
+        print("🛑 JamWorker stopped")

utils.py ADDED Viewed

	@@ -0,0 +1,168 @@

+# utils.py
+from __future__ import annotations
+import io, base64, math
+from math import gcd
+import numpy as np
+import soundfile as sf
+from scipy.signal import resample_poly
+# Magenta RT audio types
+from magenta_rt import audio as au
+# Optional loudness
+try:
+    import pyloudnorm as pyln
+    _HAS_LOUDNORM = True
+except Exception:
+    _HAS_LOUDNORM = False
+# ---------- Loudness ----------
+def _measure_lufs(wav: au.Waveform) -> float:
+    meter = pyln.Meter(wav.sample_rate)  # BS.1770-4
+    return float(meter.integrated_loudness(wav.samples))
+def _rms(x: np.ndarray) -> float:
+    if x.size == 0: return 0.0
+    return float(np.sqrt(np.mean(x**2)))
+def match_loudness_to_reference(
+    ref: au.Waveform,
+    target: au.Waveform,
+    method: str = "auto",   # "auto"|"lufs"|"rms"|"none"
+    headroom_db: float = 1.0
+) -> tuple[au.Waveform, dict]:
+    stats = {"method": method, "applied_gain_db": 0.0}
+    if method == "none":
+        return target, stats
+    if method == "auto":
+        method = "lufs" if _HAS_LOUDNORM else "rms"
+    if method == "lufs" and _HAS_LOUDNORM:
+        L_ref = _measure_lufs(ref)
+        L_tgt = _measure_lufs(target)
+        delta_db = L_ref - L_tgt
+        gain = 10.0 ** (delta_db / 20.0)
+        y = target.samples.astype(np.float32) * gain
+        stats.update({"ref_lufs": L_ref, "tgt_lufs_before": L_tgt, "applied_gain_db": delta_db})
+    else:
+        ra = _rms(ref.samples)
+        rb = _rms(target.samples)
+        if rb <= 1e-12:
+            return target, stats
+        gain = ra / rb
+        y = target.samples.astype(np.float32) * gain
+        stats.update({"ref_rms": ra, "tgt_rms_before": rb, "applied_gain_db": 20*np.log10(max(gain,1e-12))})
+    # simple peak “limiter” to keep headroom
+    limit = 10 ** (-headroom_db / 20.0)   # e.g., -1 dBFS
+    peak = float(np.max(np.abs(y))) if y.size else 0.0
+    if peak > limit:
+        y *= (limit / peak)
+        stats["post_peak_limited"] = True
+    else:
+        stats["post_peak_limited"] = False
+    target.samples = y.astype(np.float32)
+    return target, stats
+# ---------- Stitch / fades / trims ----------
+def stitch_generated(chunks, sr: int, xfade_s: float) -> au.Waveform:
+    if not chunks:
+        raise ValueError("no chunks")
+    xfade_n = int(round(xfade_s * sr))
+    if xfade_n <= 0:
+        return au.Waveform(np.concatenate([c.samples for c in chunks], axis=0), sr)
+    t = np.linspace(0, np.pi/2, xfade_n, endpoint=False, dtype=np.float32)
+    eq_in, eq_out = np.sin(t)[:, None], np.cos(t)[:, None]
+    first = chunks[0].samples
+    if first.shape[0] < xfade_n:
+        raise ValueError("chunk shorter than crossfade prefix")
+    out = first[xfade_n:].copy()  # drop model pre-roll
+    for i in range(1, len(chunks)):
+        cur = chunks[i].samples
+        if cur.shape[0] < xfade_n:
+            continue
+        head, tail = cur[:xfade_n], cur[xfade_n:]
+        mixed = out[-xfade_n:] * eq_out + head * eq_in
+        out = np.concatenate([out[:-xfade_n], mixed, tail], axis=0)
+    return au.Waveform(out, sr)
+def hard_trim_seconds(wav: au.Waveform, seconds: float) -> au.Waveform:
+    n = int(round(seconds * wav.sample_rate))
+    return au.Waveform(wav.samples[:n], wav.sample_rate)
+def apply_micro_fades(wav: au.Waveform, ms: int = 5) -> None:
+    n = int(wav.sample_rate * ms / 1000.0)
+    if n > 0 and wav.samples.shape[0] > 2*n:
+        env = np.linspace(0.0, 1.0, n, dtype=np.float32)[:, None]
+        wav.samples[:n]  *= env
+        wav.samples[-n:] *= env[::-1]
+# ---------- Token context helpers ----------
+def make_bar_aligned_context(tokens, bpm, fps=25, ctx_frames=250, beats_per_bar=4):
+    frames_per_bar_f = (beats_per_bar * 60.0 / bpm) * fps
+    frames_per_bar = int(round(frames_per_bar_f))
+    if abs(frames_per_bar - frames_per_bar_f) > 1e-3:
+        reps = int(np.ceil(ctx_frames / len(tokens)))
+        return np.tile(tokens, (reps, 1))[-ctx_frames:]
+    reps = int(np.ceil(ctx_frames / len(tokens)))
+    tiled = np.tile(tokens, (reps, 1))
+    end = (len(tiled) // frames_per_bar) * frames_per_bar
+    if end < ctx_frames:
+        return tiled[-ctx_frames:]
+    start = end - ctx_frames
+    return tiled[start:end]
+def take_bar_aligned_tail(wav: au.Waveform, bpm: float, beats_per_bar: int, ctx_seconds: float, max_bars=None) -> au.Waveform:
+    spb = (60.0 / bpm) * beats_per_bar
+    bars_needed = max(1, int(round(ctx_seconds / spb)))
+    if max_bars is not None:
+        bars_needed = min(bars_needed, max_bars)
+    tail_seconds = bars_needed * spb
+    n = int(round(tail_seconds * wav.sample_rate))
+    if n >= wav.samples.shape[0]:
+        return wav
+    return au.Waveform(wav.samples[-n:], wav.sample_rate)
+# ---------- SR normalize + snap ----------
+def resample_and_snap(x: np.ndarray, cur_sr: int, target_sr: int, seconds: float) -> np.ndarray:
+    """
+    x: np.ndarray shape (S, C), float32
+    Returns: exact-length array (round(seconds*target_sr), C)
+    """
+    if x.ndim == 1:
+        x = x[:, None]
+    if cur_sr != target_sr:
+        g = gcd(cur_sr, target_sr)
+        up, down = target_sr // g, cur_sr // g
+        x = resample_poly(x, up, down, axis=0)
+    expected_len = int(round(seconds * target_sr))
+    if x.shape[0] < expected_len:
+        pad = np.zeros((expected_len - x.shape[0], x.shape[1]), dtype=x.dtype)
+        x = np.vstack([x, pad])
+    elif x.shape[0] > expected_len:
+        x = x[:expected_len, :]
+    return x.astype(np.float32, copy=False)
+# ---------- WAV encode ----------
+def wav_bytes_base64(x: np.ndarray, sr: int) -> tuple[str, int, int]:
+    """
+    x: np.ndarray shape (S, C)
+    returns: (base64_wav, total_samples, channels)
+    """
+    buf = io.BytesIO()
+    sf.write(buf, x, sr, subtype="FLOAT", format="WAV")
+    buf.seek(0)
+    b64 = base64.b64encode(buf.read()).decode("utf-8")
+    return b64, int(x.shape[0]), int(x.shape[1])