Spaces:
Running
Running
| import { useState } from "react"; | |
| import { useModel } from "./hooks/useModel"; | |
| import Waveform from "./components/Waveform"; | |
| import PulseBars from "./components/PulseBars"; | |
| const PRESETS = [ | |
| { | |
| name: "Pop Ballad", | |
| emoji: "💗", | |
| duration: 60, | |
| caption: "A gentle pop ballad with piano and soft vocals, key of C major, 80 BPM, emotional and dreamy", | |
| lyrics: "[verse]\nUnderneath the stars tonight\nWe dance beneath the pale moonlight\nEvery moment feels so right\nHolding you so close and tight\n\n[chorus]\nThis is where I want to be\nRight here with you next to me\nLet the world just fade away\nIn your arms I want to stay", | |
| }, | |
| { | |
| name: "Rock Anthem", | |
| emoji: "🎸", | |
| duration: 60, | |
| caption: "An energetic rock anthem with electric guitars and powerful drums, key of E minor, 140 BPM, aggressive and intense", | |
| lyrics: "[verse]\nFire burning in my veins\nBreaking free from all these chains\nNothing left to hold me back\nRiding down the beaten track\n\n[chorus]\nWe are the ones who rise\nWith thunder in our eyes\nWe'll never be denied\nWe're burning up the sky", | |
| }, | |
| { | |
| name: "Lo-fi Chill", | |
| emoji: "☕", | |
| duration: 20, | |
| caption: "A relaxing lo-fi hip hop beat with jazz piano samples and vinyl crackle, key of F major, 75 BPM, mellow and nostalgic", | |
| lyrics: "[instrumental]", | |
| }, | |
| ]; | |
| function WebGPUGate({ children }) { | |
| const supported = typeof navigator !== "undefined" && !!navigator.gpu; | |
| if (supported) return children; | |
| return ( | |
| <div className="fixed inset-0 flex items-center justify-center z-50" style={{ background: "var(--bg)" }}> | |
| <div className="text-center max-w-md px-6"> | |
| <div className="text-5xl mb-4">🎹</div> | |
| <h1 className="text-2xl font-semibold mb-3" style={{ color: "var(--text)" }}> | |
| WebGPU not available | |
| </h1> | |
| <p style={{ color: "var(--text-muted)" }}> | |
| This demo needs WebGPU to run ACE-Step in your browser. Try Chrome 113+, Edge 113+, or Safari 26+ on desktop. | |
| </p> | |
| </div> | |
| </div> | |
| ); | |
| } | |
| function ProgressBar({ progress }) { | |
| if (!progress) return null; | |
| const pct = Math.max(0, Math.min(100, progress.percent || 0)); | |
| return ( | |
| <div className="w-full"> | |
| <div className="flex justify-between text-[11px] mb-1.5" style={{ color: "var(--text-muted)" }}> | |
| <span>{progress.label}</span> | |
| <span className="font-mono"> | |
| {progress.total > 1 && `${(progress.loaded / 1e6).toFixed(0)} / ${(progress.total / 1e6).toFixed(0)} MB · `} | |
| {pct.toFixed(0)}% | |
| </span> | |
| </div> | |
| <div className="h-1 rounded-full overflow-hidden" style={{ background: "var(--border)" }}> | |
| <div | |
| className="h-full rounded-full transition-all duration-300" | |
| style={{ width: `${pct}%`, background: "var(--accent)" }} | |
| /> | |
| </div> | |
| </div> | |
| ); | |
| } | |
| function LoadGate({ onLoad, status, message, progress, error }) { | |
| const loading = status === "loading"; | |
| return ( | |
| <div | |
| className="rounded-2xl p-8 fade-in" | |
| style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }} | |
| > | |
| <div className="flex flex-col items-center text-center"> | |
| <div className="text-4xl mb-3">🎹</div> | |
| <h2 className="text-xl font-semibold mb-2" style={{ color: "var(--text)" }}> | |
| Load models | |
| </h2> | |
| <p className="text-sm max-w-sm mb-5" style={{ color: "var(--text-muted)" }}> | |
| Loads ~8 GB of ONNX models. Everything runs in your browser — your prompts never leave this device. | |
| Built with{" "} | |
| <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline" style={{ color: "var(--accent)" }}> | |
| 🤗 Transformers.js | |
| </a> | |
| {" + "} | |
| <a href="https://onnxruntime.ai/docs/tutorials/web/" target="_blank" rel="noreferrer" className="underline" style={{ color: "var(--accent)" }}> | |
| ONNX Runtime Web | |
| </a>. | |
| </p> | |
| {error ? ( | |
| <div className="w-full text-sm mb-4 p-3 rounded-lg text-left" style={{ background: "oklch(0.25 0.08 22 / 0.3)", color: "var(--danger)" }}> | |
| {error} | |
| </div> | |
| ) : loading ? ( | |
| <div className="w-full space-y-3"> | |
| {message && ( | |
| <p className="text-xs" style={{ color: "var(--text-muted)" }}> | |
| {message} | |
| </p> | |
| )} | |
| {progress && <ProgressBar progress={progress} />} | |
| </div> | |
| ) : ( | |
| <button | |
| onClick={onLoad} | |
| disabled={loading} | |
| className="px-8 py-2.5 rounded-full font-medium transition hover:scale-[1.02] cursor-pointer" | |
| style={{ | |
| background: "var(--accent)", | |
| color: "var(--bg)", | |
| letterSpacing: "-0.01em", | |
| }} | |
| > | |
| Load models | |
| </button> | |
| )} | |
| </div> | |
| </div> | |
| ); | |
| } | |
| function PresetCard({ preset, active, onClick }) { | |
| return ( | |
| <button | |
| onClick={onClick} | |
| className="flex-1 min-w-0 p-3 rounded-xl text-left transition-all cursor-pointer hover:scale-[1.02]" | |
| style={{ | |
| background: active ? "var(--accent-soft)" : "var(--bg-elev)", | |
| border: `1px solid ${active ? "var(--accent)" : "var(--border)"}`, | |
| }} | |
| > | |
| <div className="text-xl mb-1">{preset.emoji}</div> | |
| <div className="text-sm font-medium truncate" style={{ color: "var(--text)" }}> | |
| {preset.name} | |
| </div> | |
| <div className="text-[10px] uppercase tracking-wider mt-0.5" style={{ color: "var(--text-dim)" }}> | |
| {preset.duration}s · {preset.lyrics === "[instrumental]" ? "instrumental" : "vocal"} | |
| </div> | |
| </button> | |
| ); | |
| } | |
| function GenerationStatus({ status, message }) { | |
| if (status !== "generating") return null; | |
| return ( | |
| <div | |
| className="rounded-2xl p-5 fade-in" | |
| style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }} | |
| > | |
| <PulseBars count={60} /> | |
| <div className="mt-3 flex items-center justify-between text-xs"> | |
| <span style={{ color: "var(--text)" }}>{message || "Generating…"}</span> | |
| <span className="font-mono" style={{ color: "var(--text-muted)" }}> | |
| this takes 1–4 min | |
| </span> | |
| </div> | |
| </div> | |
| ); | |
| } | |
| function OutputCard({ audioUrl, audioInfo }) { | |
| if (!audioUrl) return null; | |
| return ( | |
| <div | |
| className="rounded-2xl p-5 fade-in space-y-3" | |
| style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }} | |
| > | |
| <Waveform src={audioUrl} duration={audioInfo?.duration} /> | |
| <div className="flex items-center justify-between text-xs pt-2" style={{ borderTop: "1px solid var(--border)" }}> | |
| <div className="font-mono" style={{ color: "var(--text-muted)" }}> | |
| 48 kHz · stereo | |
| {audioInfo?.totalTime && ` · ${audioInfo.totalTime}s gen`} | |
| </div> | |
| <a | |
| href={audioUrl} | |
| download={audioInfo?.filename || "ace-step.wav"} | |
| className="px-3 py-1.5 rounded-md text-xs font-medium transition hover:opacity-80 cursor-pointer" | |
| style={{ background: "var(--surface)", color: "var(--text)" }} | |
| > | |
| ⬇ Download WAV | |
| </a> | |
| </div> | |
| </div> | |
| ); | |
| } | |
| export default function App() { | |
| const { status, message, progress, audioUrl, audioInfo, error, isLoaded, loadModel, generate } = useModel(); | |
| const [activeIdx, setActiveIdx] = useState(0); | |
| const [caption, setCaption] = useState(PRESETS[0].caption); | |
| const [lyrics, setLyrics] = useState(PRESETS[0].lyrics); | |
| const [duration, setDuration] = useState(PRESETS[0].duration); | |
| const [shift, setShift] = useState(3.0); | |
| const [numSteps, setNumSteps] = useState(8); | |
| const isWorking = status === "loading" || status === "generating"; | |
| const applyPreset = (i) => { | |
| setActiveIdx(i); | |
| setCaption(PRESETS[i].caption); | |
| setLyrics(PRESETS[i].lyrics); | |
| setDuration(PRESETS[i].duration); | |
| }; | |
| return ( | |
| <WebGPUGate> | |
| <div className="min-h-screen flex flex-col items-center px-4 py-10" style={{ background: "var(--bg)" }}> | |
| {/* Hero */} | |
| <header className="mb-10 w-full max-w-2xl fade-in"> | |
| <h1 className="leading-none mb-2 flex items-baseline gap-3 flex-wrap" style={{ | |
| fontSize: "clamp(2.5rem, 5vw, 3.5rem)", | |
| color: "var(--text)", | |
| }}> | |
| <span style={{ fontFamily: "'Dancing Script', cursive", fontWeight: 600 }}> | |
| ACE-Step | |
| </span> | |
| <span style={{ fontWeight: 600, letterSpacing: "-0.03em" }}> | |
| WebGPU | |
| </span> | |
| </h1> | |
| <p className="text-lg" style={{ color: "var(--text-muted)" }}> | |
| Describe any song. AI writes & produces it. | |
| </p> | |
| </header> | |
| <main className="w-full max-w-2xl space-y-4"> | |
| {!isLoaded ? ( | |
| <LoadGate onLoad={loadModel} status={status} message={message} progress={progress} error={error} /> | |
| ) : ( | |
| <> | |
| {/* Presets */} | |
| <div className="flex gap-2"> | |
| {PRESETS.map((p, i) => ( | |
| <PresetCard key={p.name} preset={p} active={i === activeIdx} onClick={() => applyPreset(i)} /> | |
| ))} | |
| </div> | |
| {/* Caption */} | |
| <div className="rounded-2xl p-4" style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}> | |
| <label className="text-[10px] uppercase tracking-widest mb-2 block" style={{ color: "var(--text-dim)" }}> | |
| Description | |
| </label> | |
| <textarea | |
| value={caption} | |
| onChange={(e) => setCaption(e.target.value)} | |
| onInput={() => setActiveIdx(-1)} | |
| rows={2} | |
| className="w-full bg-transparent text-sm resize-none outline-none" | |
| style={{ color: "var(--text)" }} | |
| placeholder="Describe the music — style, instruments, key, BPM, mood…" | |
| /> | |
| </div> | |
| {/* Lyrics */} | |
| <div className="rounded-2xl p-4" style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}> | |
| <label className="text-[10px] uppercase tracking-widest mb-2 block" style={{ color: "var(--text-dim)" }}> | |
| Lyrics (use [verse] / [chorus] tags, or [instrumental]) | |
| </label> | |
| <textarea | |
| value={lyrics} | |
| onChange={(e) => setLyrics(e.target.value)} | |
| onInput={() => setActiveIdx(-1)} | |
| rows={6} | |
| className="w-full bg-transparent text-sm resize-none outline-none font-mono" | |
| style={{ color: "var(--text)" }} | |
| /> | |
| </div> | |
| {/* Controls — pill row */} | |
| <div className="flex items-center gap-3 flex-wrap"> | |
| <div className="flex items-center gap-2 px-3 py-1.5 rounded-full text-xs" | |
| style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}> | |
| <span style={{ color: "var(--text-muted)" }}>Duration</span> | |
| <input | |
| type="range" | |
| min={10} | |
| max={90} | |
| step={10} | |
| value={duration} | |
| onChange={(e) => setDuration(Number(e.target.value))} | |
| className="w-24" | |
| /> | |
| <span className="font-mono w-8 text-right" style={{ color: "var(--text)" }}>{duration}s</span> | |
| </div> | |
| <div className="flex items-center gap-2 px-3 py-1.5 rounded-full text-xs" | |
| style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}> | |
| <span style={{ color: "var(--text-muted)" }}>Steps</span> | |
| <select | |
| value={numSteps} | |
| onChange={(e) => setNumSteps(Number(e.target.value))} | |
| className="bg-transparent outline-none cursor-pointer" | |
| style={{ color: "var(--text)" }} | |
| > | |
| <option value={8}>8 (turbo)</option> | |
| </select> | |
| </div> | |
| <div className="flex items-center gap-2 px-3 py-1.5 rounded-full text-xs" | |
| style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}> | |
| <span style={{ color: "var(--text-muted)" }}>Shift</span> | |
| <select | |
| value={shift} | |
| onChange={(e) => setShift(Number(e.target.value))} | |
| className="bg-transparent outline-none cursor-pointer" | |
| style={{ color: "var(--text)" }} | |
| > | |
| <option value={1.0}>1.0</option> | |
| <option value={2.0}>2.0</option> | |
| <option value={3.0}>3.0</option> | |
| </select> | |
| </div> | |
| </div> | |
| {/* Generate */} | |
| <button | |
| onClick={() => generate({ caption, lyrics, duration, shift, numSteps })} | |
| disabled={isWorking} | |
| className="w-full py-3.5 rounded-full font-medium text-base transition disabled:opacity-50 disabled:cursor-not-allowed hover:scale-[1.01] cursor-pointer" | |
| style={{ | |
| background: "var(--accent)", | |
| color: "var(--bg)", | |
| letterSpacing: "-0.01em", | |
| boxShadow: "0 0 40px oklch(0.72 0.17 305 / 0.25)", | |
| }} | |
| > | |
| {status === "generating" ? "Generating music…" : "Generate"} | |
| </button> | |
| <GenerationStatus status={status} message={message} /> | |
| <OutputCard audioUrl={audioUrl} audioInfo={audioInfo} /> | |
| {error && ( | |
| <div className="rounded-lg p-3 text-sm" style={{ background: "oklch(0.25 0.08 22 / 0.3)", color: "var(--danger)" }}> | |
| {error} | |
| </div> | |
| )} | |
| </> | |
| )} | |
| </main> | |
| {/* About / methodology */} | |
| <section className="w-full max-w-2xl mt-12 text-sm" style={{ color: "var(--text-muted)" }}> | |
| <details className="rounded-xl px-4 py-3" | |
| style={{ background: "var(--bg-elev)", border: "1px solid var(--border)" }}> | |
| <summary className="cursor-pointer font-medium select-none" style={{ color: "var(--text)" }}> | |
| How it works & known limitations | |
| </summary> | |
| <div className="mt-4 space-y-4 leading-relaxed"> | |
| <div> | |
| <h3 className="text-[13px] uppercase tracking-widest mb-2" style={{ color: "var(--text-dim)" }}>Pipeline</h3> | |
| <ol className="list-decimal list-inside space-y-1"> | |
| <li><span style={{ color: "var(--text)" }}>Text encoder</span> (Qwen3-Embedding-0.6B, fp16) turns the caption into conditioning hidden states; the same model provides token embeddings for the lyric path.</li> | |
| <li><span style={{ color: "var(--text)" }}>5 Hz LM</span> (ACE-Step acestep-5Hz-lm-0.6B, 4-bit MatMulNBits) writes a short chain-of-thought, then emits ~50 audio codes per 10 s of output.</li> | |
| <li><span style={{ color: "var(--text)" }}>FSQ → detokenizer</span> expands the codes into 25 Hz acoustic features used as cross-attention hints.</li> | |
| <li><span style={{ color: "var(--text)" }}>DiT decoder</span> (2B parameters, fp16) runs 8 Euler flow-matching steps (shift=3.0) over a random latent conditioned on text, lyrics, and hints.</li> | |
| <li><span style={{ color: "var(--text)" }}>Oobleck VAE</span> (fp16) decodes the 25 Hz latent into stereo 48 kHz audio.</li> | |
| </ol> | |
| </div> | |
| <div> | |
| <h3 className="text-[13px] uppercase tracking-widest mb-2" style={{ color: "var(--text-dim)" }}>Why it runs in the browser</h3> | |
| <p> | |
| Everything executes on-device via <code className="font-mono text-xs">onnxruntime-web</code> with the WebGPU execution provider. Two Web Workers keep the LM and the diffusion+VAE graphs in separate WASM heaps so neither hits the 4 GB single-heap limit. Total download is ~2 GB (cached in the browser after the first load). | |
| </p> | |
| </div> | |
| <div> | |
| <h3 className="text-[13px] uppercase tracking-widest mb-2" style={{ color: "var(--text-dim)" }}>Methodology notes</h3> | |
| <ul className="list-disc list-inside space-y-1"> | |
| <li>Compared stage-by-stage against the PyTorch fp32 reference: every tensor agrees to within 0.2% relative L2, and the generated waveforms sound identical.</li> | |
| <li>FP16 DiT is exported natively (<code className="font-mono text-xs">model.half()</code> + dynamo). An earlier fp32→fp16 conversion with post-hoc Cast insertion produced a 25 Hz helicopter artifact, now resolved.</li> | |
| <li>4-bit quantization is MatMulNBits with <code className="font-mono text-xs">block_size=64</code>, asymmetric, <code className="font-mono text-xs">accuracy_level=1</code> (fp32 accumulate).</li> | |
| </ul> | |
| </div> | |
| <div> | |
| <h3 className="text-[13px] uppercase tracking-widest mb-2" style={{ color: "var(--text-dim)" }}>Known limitations</h3> | |
| <ul className="list-disc list-inside space-y-1"> | |
| <li><span style={{ color: "var(--text)" }}>First load is slow.</span> ~2 GB of weights must be fetched and cached; subsequent runs start fast.</li> | |
| <li><span style={{ color: "var(--text)" }}>Vocals need ≥60 s.</span> The 0.6B LM often refuses to emit lyric-aligned audio codes for short durations — instrumentals work at any length.</li> | |
| <li><span style={{ color: "var(--text)" }}>Turbo quality ceiling.</span> We run 8 diffusion steps (shift=3.0). More steps nudge quality up but aren't supported by the turbo weights we ship.</li> | |
| <li><span style={{ color: "var(--text)" }}>Condition-encoder drift.</span> The ONNX condition_encoder has a small drift (~0.4 max_diff) vs PyTorch on real inputs — inaudible today but a known residual we haven’t closed.</li> | |
| <li><span style={{ color: "var(--text)" }}>WebGPU only.</span> No fallback path; the demo gates on WebGPU support (Chrome/Edge 113+, Safari 26+ desktop).</li> | |
| <li><span style={{ color: "var(--text)" }}>Memory.</span> Two workers each hold ~1–2 GB; low-RAM devices may hit <code className="font-mono text-xs">std::bad_alloc</code> during model creation.</li> | |
| <li><span style={{ color: "var(--text)" }}>No seed control.</span> Each generation uses a fresh RNG, so re-runs with the same prompt will differ.</li> | |
| </ul> | |
| </div> | |
| </div> | |
| </details> | |
| </section> | |
| {/* Footer */} | |
| <footer className="mt-12 mb-6 text-center text-xs space-y-2" style={{ color: "var(--text-dim)" }}> | |
| <div> | |
| <a href="https://huggingface.co/shreyask/ACE-Step-v1.5-ONNX" target="_blank" rel="noreferrer" className="hover:opacity-80 transition" style={{ color: "var(--text-muted)" }}> | |
| shreyask/ACE-Step-v1.5-ONNX | |
| </a> | |
| <span className="mx-2">·</span> | |
| <a href="https://huggingface.co/ACE-Step/Ace-Step1.5" target="_blank" rel="noreferrer" className="hover:opacity-80 transition" style={{ color: "var(--text-muted)" }}> | |
| ACE-Step 1.5 | |
| </a> | |
| <span className="mx-2">·</span> | |
| <span>Apache 2.0</span> | |
| </div> | |
| <div> | |
| Made with <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline" style={{ color: "var(--text-muted)" }}>🤗 Transformers.js</a> | |
| </div> | |
| </footer> | |
| </div> | |
| </WebGPUGate> | |
| ); | |
| } | |