Spaces:

thecollabagepatch
/

magenta-retry

Running

App Files Files Community

thecollabagepatch commited on Sep 17

Commit

30fdbbc

1 Parent(s): 384e4ac

updating docs a bit

Browse files

Files changed (2) hide show

app.py +30 -179
documentation.html +302 -43

app.py CHANGED Viewed

@@ -77,45 +77,22 @@ from pydantic import BaseModel
 from model_management import CheckpointManager, AssetManager, ModelSelector, ModelSelect
 # ---- Finetune assets (mean & centroids) --------------------------------------
-_FINETUNE_REPO_DEFAULT = os.getenv("MRT_ASSETS_REPO", "thepatch/magenta-ft")
 _ASSETS_REPO_ID: str | None = None
 _MEAN_EMBED: np.ndarray | None = None           # shape (D,) dtype float32
 _CENTROIDS: np.ndarray | None = None            # shape (K, D) dtype float32
-_STEP_RE = re.compile(r"(?:^|/)checkpoint_(\d+)(?:/|\.tar\.gz|\.tgz)?$")
 # Create instances (these don't modify globals)
 asset_manager = AssetManager()
 model_selector = ModelSelector(CheckpointManager(), asset_manager)
 # Sync asset manager with existing globals
-def _sync_asset_manager():
-    asset_manager.mean_embed = _MEAN_EMBED
-    asset_manager.centroids = _CENTROIDS
-    asset_manager.assets_repo_id = _ASSETS_REPO_ID
-# def _list_ckpt_steps(repo_id: str, revision: str = "main") -> list[int]:
-#     """
-#     List available checkpoint steps in a HF model repo without downloading all weights.
-#     Looks for:
-#       checkpoint_<step>/
-#       checkpoint_<step>.tgz | .tar.gz
-#       archives/checkpoint_<step>.tgz | .tar.gz
-#     """
-#     api = HfApi()
-#     files = api.list_repo_files(repo_id=repo_id, repo_type="model", revision=revision)
-#     steps = set()
-#     for f in files:
-#         m = _STEP_RE.search(f)
-#         if m:
-#             try:
-#                 steps.add(int(m.group(1)))
-#             except:
-#                 pass
-#     return sorted(steps)
-# def _step_exists(repo_id: str, revision: str, step: int) -> bool:
-#     return step in _list_ckpt_steps(repo_id, revision)
 def _any_jam_running() -> bool:
     with jam_lock:
@@ -129,132 +106,6 @@ def _stop_all_jams(timeout: float = 5.0):
                 w.join(timeout=timeout)
                 jam_registry.pop(sid, None)
-# def _load_finetune_assets_from_hf(repo_id: str | None) -> tuple[bool, str]:
-#     """
-#     Download & load mean_style_embed.npy and cluster_centroids.npy from a HF model repo.
-#     Safe to call multiple times; will overwrite globals if successful.
-#     """
-#     global _ASSETS_REPO_ID, _MEAN_EMBED, _CENTROIDS
-#     repo_id = repo_id or _FINETUNE_REPO_DEFAULT
-#     try:
-#         from huggingface_hub import hf_hub_download
-#         mean_path = None
-#         cent_path = None
-#         try:
-#             mean_path = hf_hub_download(repo_id, filename="mean_style_embed.npy", repo_type="model")
-#         except Exception:
-#             pass
-#         try:
-#             cent_path = hf_hub_download(repo_id, filename="cluster_centroids.npy", repo_type="model")
-#         except Exception:
-#             pass
-#         if mean_path is None and cent_path is None:
-#             return False, f"No finetune asset files found in repo {repo_id}"
-#         if mean_path is not None:
-#             m = np.load(mean_path)
-#             if m.ndim != 1:
-#                 return False, f"mean_style_embed.npy must be 1-D (got {m.shape})"
-#         else:
-#             m = None
-#         if cent_path is not None:
-#             c = np.load(cent_path)
-#             if c.ndim != 2:
-#                 return False, f"cluster_centroids.npy must be 2-D (got {c.shape})"
-#         else:
-#             c = None
-#         # Optional: shape check vs model embedding dim once model is alive
-#         try:
-#             d = int(get_mrt().style_model.config.embedding_dim)
-#             if m is not None and m.shape[0] != d:
-#                 return False, f"mean_style_embed dim {m.shape[0]} != model dim {d}"
-#             if c is not None and c.shape[1] != d:
-#                 return False, f"cluster_centroids dim {c.shape[1]} != model dim {d}"
-#         except Exception:
-#             # Model not built yet; we’ll trust the files and rely on runtime checks later
-#             pass
-#         _MEAN_EMBED = m.astype(np.float32, copy=False) if m is not None else None
-#         _CENTROIDS = c.astype(np.float32, copy=False) if c is not None else None
-#         _ASSETS_REPO_ID = repo_id
-#         logging.info("Loaded finetune assets from %s (mean=%s, centroids=%s)",
-#                      repo_id,
-#                      "yes" if _MEAN_EMBED is not None else "no",
-#                      f"{_CENTROIDS.shape[0]}x{_CENTROIDS.shape[1]}" if _CENTROIDS is not None else "no")
-#         return True, "ok"
-#     except Exception as e:
-#         logging.exception("Failed to load finetune assets: %s", e)
-#         return False, str(e)
-# def _ensure_assets_loaded():
-#     # Best-effort lazy load if nothing is loaded yet
-#     if _MEAN_EMBED is None and _CENTROIDS is None:
-#         _load_finetune_assets_from_hf(_ASSETS_REPO_ID or _FINETUNE_REPO_DEFAULT)
-# ------------------------------------------------------------------------------
-# def _resolve_checkpoint_dir() -> str | None:
-#     repo_id = os.getenv("MRT_CKPT_REPO")
-#     if not repo_id:
-#         return None
-#     step = os.getenv("MRT_CKPT_STEP")  # e.g. "1863001"
-#     root = Path(snapshot_download(
-#         repo_id=repo_id,
-#         repo_type="model",
-#         revision=os.getenv("MRT_CKPT_REV", "main"),
-#         local_dir="/home/appuser/.cache/mrt_ckpt/repo",
-#         local_dir_use_symlinks=False,
-#     ))
-#     # Prefer an archive if present (more reliable for Zarr/T5X)
-#     arch_names = [
-#         f"checkpoint_{step}.tgz",
-#         f"checkpoint_{step}.tar.gz",
-#         f"archives/checkpoint_{step}.tgz",
-#         f"archives/checkpoint_{step}.tar.gz",
-#     ] if step else []
-#     cache_root = Path("/home/appuser/.cache/mrt_ckpt/extracted")
-#     cache_root.mkdir(parents=True, exist_ok=True)
-#     for name in arch_names:
-#         arch = root / name
-#         if arch.is_file():
-#             out_dir = cache_root / f"checkpoint_{step}"
-#             marker = out_dir.with_suffix(".ok")
-#             if not marker.exists():
-#                 out_dir.mkdir(parents=True, exist_ok=True)
-#                 with tarfile.open(arch, "r:*") as tf:
-#                     tf.extractall(out_dir)
-#                 marker.write_text("ok")
-#             # sanity: require .zarray to exist inside the extracted tree
-#             if not any(out_dir.rglob(".zarray")):
-#                 raise RuntimeError(f"Extracted archive missing .zarray files: {out_dir}")
-#             return str(out_dir / f"checkpoint_{step}") if (out_dir / f"checkpoint_{step}").exists() else str(out_dir)
-#     # No archive; try raw folder from repo and sanity check.
-#     if step:
-#         raw = root / f"checkpoint_{step}"
-#         if raw.is_dir():
-#             if not any(raw.rglob(".zarray")):
-#                 raise RuntimeError(
-#                     f"Downloaded checkpoint_{step} appears incomplete (no .zarray). "
-#                     "Upload as a .tgz or push via git from a Unix shell."
-#                 )
-#             return str(raw)
-#     # Pick latest if no step
-#     step_dirs = [d for d in root.iterdir() if d.is_dir() and re.match(r"checkpoint_\\d+$", d.name)]
-#     if step_dirs:
-#         pick = max(step_dirs, key=lambda d: int(d.name.split('_')[-1]))
-#         if not any(pick.rglob(".zarray")):
-#             raise RuntimeError(f"Downloaded {pick} appears incomplete (no .zarray).")
-#         return str(pick)
-#     return None
 async def send_json_safe(ws: WebSocket, obj) -> bool:
     """Try to send. Returns False if the socket is (or becomes) closed."""
@@ -328,19 +179,19 @@ try:
 except Exception:
     _HAS_LOUDNORM = False
-def _combine_styles(mrt, styles_str: str = "", weights_str: str = ""):
-    extra = [s.strip() for s in (styles_str or "").split(",") if s.strip()]
-    if not extra:
-        return mrt.embed_style("warmup")
-    sw = [float(x) for x in (weights_str or "").split(",") if x.strip()]
-    embeds, weights = [], []
-    for i, s in enumerate(extra):
-        embeds.append(mrt.embed_style(s))
-        weights.append(sw[i] if i < len(sw) else 1.0)
-    wsum = sum(weights) or 1.0
-    weights = [w/wsum for w in weights]
-    import numpy as np
-    return np.sum([w*e for w, e in zip(weights, embeds)], axis=0).astype(np.float32)
 def build_style_vector(
     mrt,
@@ -518,6 +369,11 @@ def _mrt_warmup():
             # Never crash on warmup errors; log and continue serving
             logging.exception("MagentaRT warmup failed (continuing without warmup): %s", e)
 # Kick it off in the background on server start
 @app.on_event("startup")
 def _kickoff_warmup():
@@ -640,17 +496,6 @@ def model_checkpoints(repo_id: str, revision: str = "main"):
     steps = CheckpointManager.list_ckpt_steps(repo_id, revision)
     return {"repo": repo_id, "revision": revision, "steps": steps, "latest": (steps[-1] if steps else None)}
-# class ModelSelect(BaseModel):
-#     size: Optional[Literal["base","large"]] = None
-#     repo_id: Optional[str] = None
-#     revision: Optional[str] = "main"
-#     step: Optional[Union[int, str]] = None   # allow "latest"
-#     assets_repo_id: Optional[str] = None     # default: follow repo_id
-#     sync_assets: bool = True                 # load mean/centroids from repo
-#     prewarm: bool = False                    # call get_mrt() to build right away
-#     stop_active: bool = True                 # auto-stop jams; else 409
-#     dry_run: bool = False                    # validate only, don't swap
 @app.post("/model/select")
 def model_select(req: ModelSelect):
     global _MRT, _MEAN_EMBED, _CENTROIDS, _ASSETS_REPO_ID
@@ -733,6 +578,12 @@ def model_select(req: ModelSelect):
         except Exception:
             pass
         raise HTTPException(status_code=500, detail=f"Swap failed: {e}")

 from model_management import CheckpointManager, AssetManager, ModelSelector, ModelSelect
 # ---- Finetune assets (mean & centroids) --------------------------------------
+# _FINETUNE_REPO_DEFAULT = os.getenv("MRT_ASSETS_REPO", "thepatch/magenta-ft")
 _ASSETS_REPO_ID: str | None = None
 _MEAN_EMBED: np.ndarray | None = None           # shape (D,) dtype float32
 _CENTROIDS: np.ndarray | None = None            # shape (K, D) dtype float32
+# _STEP_RE = re.compile(r"(?:^|/)checkpoint_(\d+)(?:/|\.tar\.gz|\.tgz)?$")
 # Create instances (these don't modify globals)
 asset_manager = AssetManager()
 model_selector = ModelSelector(CheckpointManager(), asset_manager)
 # Sync asset manager with existing globals
+# def _sync_asset_manager():
+#     asset_manager.mean_embed = _MEAN_EMBED
+#     asset_manager.centroids = _CENTROIDS
+#     asset_manager.assets_repo_id = _ASSETS_REPO_ID
 def _any_jam_running() -> bool:
     with jam_lock:
                 w.join(timeout=timeout)
                 jam_registry.pop(sid, None)
 async def send_json_safe(ws: WebSocket, obj) -> bool:
     """Try to send. Returns False if the socket is (or becomes) closed."""
 except Exception:
     _HAS_LOUDNORM = False
+# def _combine_styles(mrt, styles_str: str = "", weights_str: str = ""):
+#     extra = [s.strip() for s in (styles_str or "").split(",") if s.strip()]
+#     if not extra:
+#         return mrt.embed_style("warmup")
+#     sw = [float(x) for x in (weights_str or "").split(",") if x.strip()]
+#     embeds, weights = [], []
+#     for i, s in enumerate(extra):
+#         embeds.append(mrt.embed_style(s))
+#         weights.append(sw[i] if i < len(sw) else 1.0)
+#     wsum = sum(weights) or 1.0
+#     weights = [w/wsum for w in weights]
+#     import numpy as np
+#     return np.sum([w*e for w, e in zip(weights, embeds)], axis=0).astype(np.float32)
 def build_style_vector(
     mrt,
             # Never crash on warmup errors; log and continue serving
             logging.exception("MagentaRT warmup failed (continuing without warmup): %s", e)
+# ----------------------------
+# startup and model selection
+# ----------------------------
 # Kick it off in the background on server start
 @app.on_event("startup")
 def _kickoff_warmup():
     steps = CheckpointManager.list_ckpt_steps(repo_id, revision)
     return {"repo": repo_id, "revision": revision, "steps": steps, "latest": (steps[-1] if steps else None)}
 @app.post("/model/select")
 def model_select(req: ModelSelect):
     global _MRT, _MEAN_EMBED, _CENTROIDS, _ASSETS_REPO_ID
         except Exception:
             pass
         raise HTTPException(status_code=500, detail=f"Swap failed: {e}")
+# ----------------------------
+# one-shot generation
+# ----------------------------

documentation.html CHANGED Viewed

@@ -4,67 +4,326 @@
   <meta charset="utf-8">
   <title>MagentaRT Research API</title>
   <style>
-    body { font-family: Arial, sans-serif; max-width: 860px; margin: 48px auto; padding: 0 20px; color:#111; }
-    code, pre { background:#f6f8fa; border:1px solid #eaecef; border-radius:6px; padding:2px 6px; }
-    pre { padding:12px; overflow:auto; }
-    .muted { color:#555; }
     ul { line-height: 1.8; }
   </style>
 </head>
 <body>
-  <h1>🎵 MagentaRT Research API</h1>
-  <p class="muted"><strong>Purpose:</strong> AI music generation for iOS/web app research using Google's MagentaRT.</p>
-  <h2>Available Endpoints</h2>
-  <ul>
-    <li><code>POST /generate</code> – Generate 4–8 bars of music (HTTP, bar-aligned)</li>
-    <li><code>POST /jam/start</code> – Start continuous jamming (HTTP)</li>
-    <li><code>GET /jam/next</code> – Get next chunk (HTTP)</li>
-    <li><code>POST /jam/consume</code> – Confirm a chunk as consumed (HTTP)</li>
-    <li><code>POST /jam/stop</code> – End session (HTTP)</li>
-    <li><code>WEBSOCKET /ws/jam</code> – Realtime streaming (<code>mode="rt"</code>)</li>
-    <li><code>GET /docs</code> – API documentation (Gradio)</li>
-  </ul>
-  <h2>WebSocket Quick Start (rt mode)</h2>
-  <p>Connect to <code>wss://&lt;your-space&gt;/ws/jam</code> and send:</p>
-  <pre>{
   "type": "start",
   "mode": "rt",
   "binary_audio": false,
   "params": {
-    "styles": "warmup",
     "temperature": 1.1,
     "topk": 40,
     "guidance_weight": 1.1,
-    "pace": "realtime",          // or "asap" to bootstrap quickly
-    "max_decode_frames": 50      // default ~2.0s; try 36–45 on smaller GPUs
   }
 }</pre>
-  <p>Update parameters live:</p>
-  <pre>{
   "type": "update",
   "styles": "jazz, hiphop",
-  "style_weights": "1.0,0.8",
   "temperature": 1.2,
   "topk": 64,
   "guidance_weight": 1.0,
-  "pace": "realtime",
-  "max_decode_frames": 40
 }</pre>
-  <p>Stop:</p>
-  <pre>{"type":"stop"}</pre>
-  <h2>Notes</h2>
-  <ul>
-    <li>Audio: 48 kHz stereo, ~2.0 s chunks by default with ~40 ms crossfade.</li>
-    <li>L40S 48GB: faster than realtime → prefer <code>pace: "realtime"</code>.</li>
-    <li>L4 24GB: slightly under realtime even with pre-roll and tuning.</li>
-    <li>For sustained realtime, target ~40 GB VRAM per active stream (e.g., A100 40GB or ≈35–40 GB MIG slice).</li>
-  </ul>
-  <p class="muted"><strong>Licensing:</strong> Uses MagentaRT (Apache 2.0 + CC-BY 4.0). Users are responsible for outputs.</p>
-  <p>See <a href="../blob/main/docs" target="_blank">documentation files</a> for detailed guides.</p>
-  <p>Or <a href="/docs">/docs</a> for auto-generated FastAPI reference.</p>
 </body>
 </html>

   <meta charset="utf-8">
   <title>MagentaRT Research API</title>
   <style>
+    body {
+      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+      max-width: 900px;
+      margin: 48px auto;
+      padding: 0 24px;
+      color: #111;
+      line-height: 1.6;
+    }
+    .header { text-align: center; margin-bottom: 48px; }
+    .badge {
+      display: inline-block;
+      background: #ff6b35;
+      color: white;
+      padding: 4px 12px;
+      border-radius: 16px;
+      font-size: 0.85em;
+      font-weight: 500;
+      margin-left: 8px;
+    }
+    code, pre {
+      background: #f6f8fa;
+      border: 1px solid #eaecef;
+      border-radius: 6px;
+      font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, monospace;
+    }
+    code { padding: 2px 6px; }
+    pre {
+      padding: 16px;
+      overflow-x: auto;
+      margin: 16px 0;
+      position: relative;
+    }
+    .copy-btn {
+      position: absolute;
+      top: 8px;
+      right: 8px;
+      background: #0969da;
+      color: white;
+      border: none;
+      border-radius: 4px;
+      padding: 4px 8px;
+      font-size: 12px;
+      cursor: pointer;
+    }
+    .copy-btn:hover { background: #0550ae; }
+    .muted { color: #656d76; }
+    .warning {
+      background: #fff8c5;
+      border: 1px solid #e3b341;
+      border-radius: 8px;
+      padding: 16px;
+      margin: 16px 0;
+    }
+    .info {
+      background: #dbeafe;
+      border: 1px solid #3b82f6;
+      border-radius: 8px;
+      padding: 16px;
+      margin: 16px 0;
+    }
     ul { line-height: 1.8; }
+    .endpoint {
+      background: #f8f9fa;
+      border-left: 4px solid #0969da;
+      padding: 12px 16px;
+      margin: 12px 0;
+    }
+    .demo-placeholder {
+      background: #f6f8fa;
+      border: 2px dashed #d1d9e0;
+      border-radius: 8px;
+      padding: 48px;
+      text-align: center;
+      margin: 24px 0;
+      color: #656d76;
+    }
+    .grid {
+      display: grid;
+      grid-template-columns: 1fr 1fr;
+      gap: 24px;
+      margin: 24px 0;
+    }
+    .card {
+      background: #f8f9fa;
+      border: 1px solid #e1e8ed;
+      border-radius: 8px;
+      padding: 20px;
+    }
+    a { color: #0969da; text-decoration: none; }
+    a:hover { text-decoration: underline; }
+    .section { margin: 48px 0; }
   </style>
 </head>
 <body>
+  <div class="header">
+    <h1>🎵 MagentaRT Research API</h1>
+    <p class="muted"><strong>AI Music Generation API</strong> • Real-time streaming • Custom fine-tuning support</p>
+    <span class="badge">Research Project</span>
+  </div>
+  <div class="demo-placeholder">
+    <h3>📱 App Demo Video</h3>
+    <p>Demo video will be embedded here<br>
+    <small>Showing the iPhone app generating music in real-time</small></p>
+  </div>
+  <div class="section">
+    <h2>Overview</h2>
+    <p>This API powers AI music generation using Google's MagentaRT, designed for real-time audio streaming and custom model fine-tuning. Built for iOS app integration with WebSocket streaming support.</p>
+    <div class="info">
+      <strong>Hardware Requirements:</strong> Optimal performance requires an L40S GPU (48GB VRAM) for real-time streaming. L4 24GB works but may not maintain real-time performance.
+    </div>
+  </div>
+  <div class="section">
+    <h2>Quick Start - WebSocket Streaming</h2>
+    <p>Connect to <code>wss://&lt;your-space&gt;/ws/jam</code> for real-time audio generation:</p>
+    <h3>Start Real-time Generation</h3>
+    <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button>{
   "type": "start",
   "mode": "rt",
   "binary_audio": false,
   "params": {
+    "styles": "electronic, ambient",
+    "style_weights": "1.0, 0.8",
     "temperature": 1.1,
     "topk": 40,
     "guidance_weight": 1.1,
+    "pace": "realtime",
+    "style_ramp_seconds": 8.0,
+    "mean": 0.0,
+    "centroid_weights": "0.0, 0.0, 0.0"
   }
 }</pre>
+    <h3>Update Parameters Live</h3>
+    <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button>{
   "type": "update",
   "styles": "jazz, hiphop",
+  "style_weights": "1.0, 0.8",
   "temperature": 1.2,
   "topk": 64,
   "guidance_weight": 1.0,
+  "mean": 0.2,
+  "centroid_weights": "0.1, 0.3, 0.0"
 }</pre>
+    <h3>Stop Generation</h3>
+    <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button>{"type": "stop"}</pre>
+  </div>
+  <div class="section">
+    <h2>API Endpoints</h2>
+    <div class="endpoint">
+      <strong>POST /generate</strong> - Generate 4–8 bars of music with input audio
+    </div>
+    <div class="endpoint">
+      <strong>POST /generate_style</strong> - Generate music from style prompts only (experimental)
+    </div>
+    <div class="endpoint">
+      <strong>POST /jam/start</strong> - Start continuous jamming session
+    </div>
+    <div class="endpoint">
+      <strong>GET /jam/next</strong> - Get next audio chunk from session
+    </div>
+    <div class="endpoint">
+      <strong>POST /jam/consume</strong> - Mark chunk as consumed
+    </div>
+    <div class="endpoint">
+      <strong>POST /jam/stop</strong> - End jamming session
+    </div>
+    <div class="endpoint">
+      <strong>WEBSOCKET /ws/jam</strong> - Real-time streaming interface
+    </div>
+    <div class="endpoint">
+      <strong>POST /model/select</strong> - Switch between base and fine-tuned models
+    </div>
+  </div>
+  <div class="section">
+    <h2>Custom Fine-Tuning</h2>
+    <p>Train your own MagentaRT models and use them with this API and the iOS app.</p>
+    <div class="grid">
+      <div class="card">
+        <h3>1. Train Your Model</h3>
+        <p>Use the official MagentaRT fine-tuning notebook:</p>
+        <p><a href="https://github.com/magenta-realtime/notebooks/blob/main/Magenta_RT_Finetune.ipynb" target="_blank">🔗 MagentaRT Fine-tuning Colab</a></p>
+        <p>This will create checkpoint folders like:</p>
+        <ul>
+          <li><code>checkpoint_1861001/</code></li>
+          <li><code>checkpoint_1862001/</code></li>
+          <li>And steering assets: <code>cluster_centroids.npy</code>, <code>mean_style_embed.npy</code></li>
+        </ul>
+      </div>
+      <div class="card">
+        <h3>2. Package Checkpoints</h3>
+        <p>Checkpoints must be compressed as .tgz files to preserve .zarray files correctly.</p>
+        <div class="warning">
+          <strong>Important:</strong> Do not download checkpoint folders directly from Google Drive - the .zarray files won't transfer properly.
+        </div>
+      </div>
+    </div>
+    <h3>Checkpoint Packaging Script</h3>
+    <p>Use this in a Colab cell to properly package your checkpoints:</p>
+    <pre><button class="copy-btn" onclick="copyCode(this)">Copy</button># Mount Drive to access your trained checkpoints
+from google.colab import drive
+drive.mount('/content/drive')
+# Set the path to your checkpoint folder
+CKPT_SRC = '/content/drive/MyDrive/thepatch/checkpoint_1862001'  # Adjust path
+# Copy folder to local storage (preserves dotfiles)
+!rm -rf /content/checkpoint_1862001
+!cp -a "$CKPT_SRC" /content/
+# Verify .zarray files are present
+!find /content/checkpoint_1862001 -name .zarray | wc -l
+# Create properly formatted .tgz archive
+!tar -C /content -czf /content/checkpoint_1862001.tgz checkpoint_1862001
+# Verify critical files are in the archive
+!tar -tzf /content/checkpoint_1862001.tgz | grep -c '.zarray'
+# Download the .tgz file
+from google.colab import files
+files.download('/content/checkpoint_1862001.tgz')</pre>
+    <h3>3. Upload to Hugging Face</h3>
+    <p>Create a model repository and upload:</p>
+    <ul>
+      <li>Your <code>.tgz</code> checkpoint files</li>
+      <li><code>cluster_centroids.npy</code> (for steering)</li>
+      <li><code>mean_style_embed.npy</code> (for steering)</li>
+    </ul>
+    <div class="info">
+      <strong>Example Repository:</strong> <a href="https://huggingface.co/thepatch/magenta-ft" target="_blank">thepatch/magenta-ft</a><br>
+      Shows the correct file structure with .tgz files and .npy steering assets in the root directory.
+    </div>
+    <h3>4. Use in the App</h3>
+    <p>In the iOS app's model selector, point to your Hugging Face repository URL. The app will automatically discover available checkpoints and allow switching between them.</p>
+  </div>
+  <div class="section">
+    <h2>Technical Specifications</h2>
+    <ul>
+      <li><strong>Audio Format:</strong> 48 kHz stereo, ~2.0s chunks with ~40ms crossfade</li>
+      <li><strong>Model Sizes:</strong> Base and Large variants available</li>
+      <li><strong>Steering:</strong> Support for text prompts, audio embeddings, and centroid-based fine-tune steering</li>
+      <li><strong>Real-time Performance:</strong> L40S recommended; L4 may experience slight delays</li>
+      <li><strong>Memory Requirements:</strong> ~40GB VRAM for sustained real-time streaming</li>
+    </ul>
+    <div class="warning">
+      <strong>Note:</strong> The <code>/generate_style</code> endpoint is experimental and may not properly adhere to BPM without additional context (considering metronome-based context instead of silence).
+    </div>
+  </div>
+  <div class="section">
+    <h2>Integration with iOS App</h2>
+    <p>This API is designed to work seamlessly with our iOS music generation app:</p>
+    <ul>
+      <li>Real-time audio streaming via WebSockets</li>
+      <li>Dynamic model switching between base and fine-tuned models</li>
+      <li>Integration with stable-audio-open-small for combined input audio generation</li>
+      <li>Live parameter adjustment during generation</li>
+    </ul>
+  </div>
+  <div class="section">
+    <h2>Deployment</h2>
+    <p>To run your own instance:</p>
+    <ol>
+      <li>Duplicate this Hugging Face Space</li>
+      <li>Ensure you have access to an L40S GPU</li>
+      <li>Point your iOS app to the new space URL (e.g., <code>https://your-username-magenta-retry.hf.space</code>)</li>
+      <li>Upload your fine-tuned models as described above</li>
+    </ol>
+  </div>
+  <div class="section">
+    <h2>Support & Contact</h2>
+    <p>This is an active research project. For questions, technical support, or collaboration:</p>
+    <p><strong>Email:</strong> <a href="mailto:kev@thecollabagepatch.com">kev@thecollabagepatch.com</a></p>
+    <div class="info">
+      <strong>Research Status:</strong> This project is under active development. Features and API may change. We welcome feedback and contributions from the research community.
+    </div>
+  </div>
+  <div class="section">
+    <h2>Licensing</h2>
+    <p>Built on Google's MagentaRT (Apache 2.0 + CC-BY 4.0). Users are responsible for their generated outputs and ensuring compliance with applicable laws and platform policies.</p>
+    <p><a href="/docs">📖 API Reference Documentation</a></p>
+  </div>
+  <script>
+    function copyCode(button) {
+      const pre = button.parentElement;
+      const code = pre.textContent.replace('Copy', '').trim();
+      navigator.clipboard.writeText(code).then(() => {
+        button.textContent = 'Copied!';
+        setTimeout(() => button.textContent = 'Copy', 2000);
+      });
+    }
+  </script>
 </body>
 </html>