Spaces:
Sleeping
Sleeping
| """ | |
| BeatForge - AI music generation studio for Hugging Face CPU Basic. | |
| The default engine is a deterministic/procedural composer that turns structured | |
| lyrics and style tags into original instrumental audio. It is intentionally | |
| CPU-native so the Space runs on the free Hugging Face tier. The UI and function | |
| boundaries are ready for a future HeartMuLa/MusicGen GPU backend. | |
| """ | |
| from __future__ import annotations | |
| import math | |
| import os | |
| import re | |
| import tempfile | |
| import time | |
| import uuid | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import Dict, Iterable, List, Tuple | |
| import gradio as gr | |
| import numpy as np | |
| import requests | |
| import soundfile as sf | |
| from scipy import signal | |
| try: | |
| from pydub import AudioSegment | |
| except Exception: | |
| AudioSegment = None | |
| SR = 44100 | |
| MAX_SECONDS = 150 | |
| SECTION_RE = re.compile(r"^\s*\[([^\]]+)\]\s*$", re.MULTILINE) | |
| STYLE_PRESETS: Dict[str, Dict[str, object]] = { | |
| "pop": {"tempo": 112, "scale": "major", "drum": "four", "swing": 0.00, "brightness": 0.55}, | |
| "acoustic": {"tempo": 92, "scale": "major", "drum": "soft", "swing": 0.02, "brightness": 0.35}, | |
| "electronic": {"tempo": 124, "scale": "minor", "drum": "four", "swing": 0.00, "brightness": 0.75}, | |
| "synthwave": {"tempo": 104, "scale": "minor", "drum": "four", "swing": 0.01, "brightness": 0.70}, | |
| "rock": {"tempo": 128, "scale": "minor", "drum": "rock", "swing": 0.00, "brightness": 0.62}, | |
| "trap": {"tempo": 140, "scale": "minor", "drum": "trap", "swing": 0.04, "brightness": 0.66}, | |
| "lofi": {"tempo": 78, "scale": "minor", "drum": "lofi", "swing": 0.08, "brightness": 0.28}, | |
| "jazz": {"tempo": 96, "scale": "major", "drum": "brush", "swing": 0.16, "brightness": 0.42}, | |
| "cinematic": {"tempo": 76, "scale": "minor", "drum": "cinematic", "swing": 0.00, "brightness": 0.50}, | |
| } | |
| NOTE_ROOTS = { | |
| "C": 261.63, "C#": 277.18, "D": 293.66, "D#": 311.13, | |
| "E": 329.63, "F": 349.23, "F#": 369.99, "G": 392.00, | |
| "G#": 415.30, "A": 440.00, "A#": 466.16, "B": 493.88, | |
| } | |
| MAJOR = np.array([0, 2, 4, 5, 7, 9, 11]) | |
| MINOR = np.array([0, 2, 3, 5, 7, 8, 10]) | |
| CUSTOM_CSS = """ | |
| .gradio-container { | |
| max-width: 1180px !important; | |
| margin: 0 auto !important; | |
| font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important; | |
| } | |
| .hero { | |
| min-height: 340px; | |
| padding: 2.4rem 1.4rem 1.2rem; | |
| border-radius: 8px; | |
| color: #f8fafc; | |
| background-image: linear-gradient(rgba(8, 12, 18, 0.58), rgba(8, 12, 18, 0.78)), url('https://images.unsplash.com/photo-1493225457124-a3eb161ffa5f?auto=format&fit=crop&w=1800&q=80'); | |
| background-size: cover; | |
| background-position: center; | |
| border: 1px solid rgba(248, 113, 113, 0.28); | |
| margin-bottom: 1rem; | |
| display: flex; | |
| flex-direction: column; | |
| justify-content: flex-end; | |
| } | |
| .hero h1 { | |
| margin: 0 0 0.45rem 0 !important; | |
| color: #fff !important; | |
| font-size: 2.7rem !important; | |
| letter-spacing: 0; | |
| line-height: 1.05; | |
| } | |
| .hero p { margin: 0 !important; color: #dbeafe !important; font-size: 1rem; max-width: 760px; } | |
| .hero-row { display: flex; flex-wrap: wrap; gap: 0.55rem; margin-top: 1rem; align-items: center; } | |
| .badge { | |
| border-radius: 8px; | |
| border: 1px solid rgba(255, 255, 255, 0.24); | |
| background: rgba(255, 255, 255, 0.12); | |
| padding: 0.38rem 0.72rem; | |
| color: #fff; | |
| font-size: 0.82rem; | |
| } | |
| .brand-link { | |
| color: #ffffff !important; | |
| background: #dc2626; | |
| border-radius: 8px; | |
| padding: 0.48rem 0.82rem; | |
| font-weight: 800; | |
| text-decoration: none !important; | |
| } | |
| .panel { | |
| border-radius: 8px; | |
| border: 1px solid rgba(148, 163, 184, 0.18); | |
| padding: 1rem; | |
| } | |
| .status textarea { | |
| font-family: 'JetBrains Mono', Consolas, monospace !important; | |
| font-size: 0.86rem !important; | |
| } | |
| button, .gr-button { border-radius: 8px !important; } | |
| .notice { | |
| border: 1px solid rgba(14, 165, 233, 0.30); | |
| background: rgba(14, 165, 233, 0.08); | |
| padding: 1rem; | |
| border-radius: 8px; | |
| line-height: 1.55; | |
| } | |
| @media (max-width: 640px) { | |
| .hero { min-height: 300px; padding: 1.4rem 1rem 1rem; } | |
| .hero h1 { font-size: 2rem !important; } | |
| } | |
| """ | |
| class Section: | |
| name: str | |
| text: str | |
| class SongPlan: | |
| tempo: int | |
| key: str | |
| scale: str | |
| drum: str | |
| swing: float | |
| brightness: float | |
| seed: int | |
| sections: List[Section] | |
| def tmp_path(suffix: str) -> str: | |
| return tempfile.NamedTemporaryFile(prefix=f"beatforge_{uuid.uuid4().hex[:10]}_", suffix=suffix, delete=False).name | |
| def normalize(audio: np.ndarray, peak: float = 0.94) -> np.ndarray: | |
| audio = np.nan_to_num(audio.astype(np.float32), nan=0.0, posinf=0.0, neginf=0.0) | |
| if audio.size == 0: | |
| return audio | |
| max_val = float(np.max(np.abs(audio))) | |
| if max_val > peak: | |
| audio = audio / max_val * peak | |
| return audio.astype(np.float32) | |
| def parse_sections(lyrics: str) -> List[Section]: | |
| lyrics = (lyrics or "").strip() | |
| if not lyrics: | |
| return [Section("Instrumental", "Open instrumental theme")] | |
| matches = list(SECTION_RE.finditer(lyrics)) | |
| if not matches: | |
| return [Section("Verse", lyrics)] | |
| sections: List[Section] = [] | |
| for idx, match in enumerate(matches): | |
| start = match.end() | |
| end = matches[idx + 1].start() if idx + 1 < len(matches) else len(lyrics) | |
| text = lyrics[start:end].strip() | |
| sections.append(Section(match.group(1).strip().title(), text or match.group(1).strip())) | |
| return sections or [Section("Verse", lyrics)] | |
| def stable_seed(*parts: str) -> int: | |
| data = "|".join(parts) | |
| value = 2166136261 | |
| for ch in data: | |
| value ^= ord(ch) | |
| value = (value * 16777619) & 0xFFFFFFFF | |
| return value | |
| def choose_plan(lyrics: str, tags: str, duration: int, creativity: float) -> SongPlan: | |
| tag_text = (tags or "").lower() | |
| preset = STYLE_PRESETS["pop"].copy() | |
| for name, values in STYLE_PRESETS.items(): | |
| if name in tag_text: | |
| preset.update(values) | |
| break | |
| if "happy" in tag_text or "bright" in tag_text or "uplifting" in tag_text: | |
| preset["scale"] = "major" | |
| if "dark" in tag_text or "sad" in tag_text or "moody" in tag_text: | |
| preset["scale"] = "minor" | |
| if "slow" in tag_text: | |
| preset["tempo"] = max(64, int(preset["tempo"]) - 18) | |
| if "fast" in tag_text or "energetic" in tag_text: | |
| preset["tempo"] = min(150, int(preset["tempo"]) + 16) | |
| seed = stable_seed(lyrics[:800], tags, str(duration), str(creativity)) | |
| keys = list(NOTE_ROOTS.keys()) | |
| key = keys[seed % len(keys)] | |
| tempo_jitter = int((creativity - 1.0) * 10) | |
| return SongPlan( | |
| tempo=int(np.clip(int(preset["tempo"]) + tempo_jitter, 62, 156)), | |
| key=key, | |
| scale=str(preset["scale"]), | |
| drum=str(preset["drum"]), | |
| swing=float(preset["swing"]), | |
| brightness=float(preset["brightness"]), | |
| seed=seed, | |
| sections=parse_sections(lyrics), | |
| ) | |
| def note_freq(root: str, semitone: int, octave_shift: int = 0) -> float: | |
| return NOTE_ROOTS[root] * (2 ** ((semitone + 12 * octave_shift) / 12.0)) | |
| def envelope(length: int, attack: float, release: float) -> np.ndarray: | |
| env = np.ones(length, dtype=np.float32) | |
| a = min(length, max(1, int(attack * SR))) | |
| r = min(length, max(1, int(release * SR))) | |
| env[:a] = np.linspace(0, 1, a) | |
| env[-r:] *= np.linspace(1, 0, r) | |
| return env | |
| def osc(freq: float, seconds: float, kind: str = "sine", phase: float = 0.0) -> np.ndarray: | |
| n = max(1, int(seconds * SR)) | |
| t = np.arange(n, dtype=np.float32) / SR | |
| if kind == "saw": | |
| return signal.sawtooth(2 * np.pi * freq * t + phase).astype(np.float32) | |
| if kind == "square": | |
| return signal.square(2 * np.pi * freq * t + phase).astype(np.float32) | |
| if kind == "tri": | |
| return signal.sawtooth(2 * np.pi * freq * t + phase, width=0.5).astype(np.float32) | |
| return np.sin(2 * np.pi * freq * t + phase).astype(np.float32) | |
| def add_at(track: np.ndarray, start: int, audio: np.ndarray, gain: float = 1.0) -> None: | |
| if start >= len(track): | |
| return | |
| end = min(len(track), start + len(audio)) | |
| track[start:end] += audio[: end - start] * gain | |
| def kick() -> np.ndarray: | |
| n = int(0.34 * SR) | |
| t = np.arange(n) / SR | |
| freq = 92 * np.exp(-t * 18) + 38 | |
| phase = 2 * np.pi * np.cumsum(freq) / SR | |
| body = np.sin(phase) * np.exp(-t * 9) | |
| click = np.random.default_rng(7).normal(0, 0.018, n) * np.exp(-t * 85) | |
| return normalize((body + click).astype(np.float32), 0.95) | |
| def snare() -> np.ndarray: | |
| n = int(0.22 * SR) | |
| rng = np.random.default_rng(11) | |
| noise = rng.normal(0, 1, n).astype(np.float32) | |
| sos = signal.butter(2, [1400, 7200], btype="bandpass", fs=SR, output="sos") | |
| noise = signal.sosfilt(sos, noise) * np.exp(-np.arange(n) / SR * 16) | |
| tone = osc(190, n / SR, "sine") * np.exp(-np.arange(n) / SR * 22) | |
| return normalize(noise * 0.7 + tone * 0.35, 0.8) | |
| def hat() -> np.ndarray: | |
| n = int(0.08 * SR) | |
| rng = np.random.default_rng(19) | |
| noise = rng.normal(0, 1, n).astype(np.float32) | |
| sos = signal.butter(2, 7000, btype="highpass", fs=SR, output="sos") | |
| noise = signal.sosfilt(sos, noise) * np.exp(-np.arange(n) / SR * 55) | |
| return normalize(noise, 0.45) | |
| def render_drums(length: int, tempo: int, pattern: str, swing: float, rng: np.random.Generator) -> np.ndarray: | |
| track = np.zeros(length, dtype=np.float32) | |
| beat = 60.0 / tempo | |
| k, s, h = kick(), snare(), hat() | |
| total_beats = int((length / SR) / beat) + 2 | |
| for b in range(total_beats): | |
| base = b * beat | |
| bar_pos = b % 4 | |
| if pattern in {"four", "electronic"}: | |
| add_at(track, int(base * SR), k, 0.9) | |
| if bar_pos in {1, 3}: | |
| add_at(track, int(base * SR), s, 0.62) | |
| elif pattern == "rock": | |
| if bar_pos in {0, 2}: | |
| add_at(track, int(base * SR), k, 0.95) | |
| if bar_pos in {1, 3}: | |
| add_at(track, int(base * SR), s, 0.78) | |
| if bar_pos == 2: | |
| add_at(track, int((base + beat * 0.5) * SR), k, 0.55) | |
| elif pattern == "trap": | |
| if bar_pos in {0, 2}: | |
| add_at(track, int(base * SR), k, 0.9) | |
| if bar_pos == 3: | |
| add_at(track, int(base * SR), s, 0.7) | |
| elif pattern == "cinematic": | |
| if b % 8 == 0: | |
| add_at(track, int(base * SR), k, 0.85) | |
| if b % 8 == 6: | |
| add_at(track, int(base * SR), s, 0.45) | |
| else: | |
| if bar_pos in {0, 2}: | |
| add_at(track, int(base * SR), k, 0.42) | |
| if bar_pos == 3: | |
| add_at(track, int(base * SR), s, 0.34) | |
| for sub in range(2): | |
| off = base + sub * beat * 0.5 | |
| if sub == 1: | |
| off += beat * swing | |
| gain = 0.22 + 0.10 * rng.random() | |
| if pattern == "trap" and rng.random() < 0.35: | |
| add_at(track, int((off + beat * 0.25) * SR), h, gain * 0.75) | |
| add_at(track, int(off * SR), h, gain) | |
| return normalize(track, 0.85) | |
| def render_chord(freqs: Iterable[float], seconds: float, brightness: float) -> np.ndarray: | |
| freqs = list(freqs) | |
| n = max(1, int(seconds * SR)) | |
| chord = np.zeros(n, dtype=np.float32) | |
| for i, f in enumerate(freqs): | |
| chord += osc(f, seconds, "saw", phase=i * 0.2) * (0.32 / (i + 1)) | |
| chord += osc(f * 2, seconds, "tri", phase=i * 0.1) * 0.08 | |
| sos = signal.butter(2, 900 + brightness * 3200, btype="lowpass", fs=SR, output="sos") | |
| chord = signal.sosfilt(sos, chord) | |
| return chord * envelope(n, 0.05, 0.18) | |
| def render_tone(freq: float, seconds: float, kind: str, gain: float) -> np.ndarray: | |
| n = max(1, int(seconds * SR)) | |
| tone = osc(freq, seconds, kind) | |
| if kind != "sine": | |
| sos = signal.butter(2, 2400, btype="lowpass", fs=SR, output="sos") | |
| tone = signal.sosfilt(sos, tone) | |
| return tone * envelope(n, 0.01, 0.05) * gain | |
| def section_weight(name: str) -> float: | |
| low = name.lower() | |
| if "chorus" in low or "hook" in low: | |
| return 1.28 | |
| if "bridge" in low: | |
| return 1.10 | |
| if "intro" in low or "outro" in low: | |
| return 0.72 | |
| return 1.0 | |
| def render_track(lyrics: str, tags: str, duration: int, creativity: float, diversity: int, cfg: float) -> Tuple[str, str]: | |
| duration = int(np.clip(duration, 15, MAX_SECONDS)) | |
| plan = choose_plan(lyrics, tags, duration, creativity) | |
| rng = np.random.default_rng(plan.seed) | |
| length = duration * SR | |
| scale = MAJOR if plan.scale == "major" else MINOR | |
| root = plan.key | |
| beat = 60.0 / plan.tempo | |
| track = np.zeros(length, dtype=np.float32) | |
| drums = render_drums(length, plan.tempo, plan.drum, plan.swing, rng) * (0.36 + 0.06 * cfg) | |
| track += drums | |
| weights = np.array([section_weight(s.name) for s in plan.sections], dtype=np.float32) | |
| sec_lengths = np.maximum(4.0, duration * weights / weights.sum()) | |
| starts = np.cumsum(np.concatenate([[0.0], sec_lengths[:-1]])) | |
| progression = [0, 5, 3, 4] if plan.scale == "major" else [0, 5, 6, 3] | |
| for sec_idx, (section, sec_start, sec_len) in enumerate(zip(plan.sections, starts, sec_lengths)): | |
| energy = section_weight(section.name) | |
| words = re.findall(r"[A-Za-z']+", section.text) | |
| syllable_proxy = max(4, sum(max(1, len(w) // 4) for w in words)) | |
| bars = max(1, int(sec_len / (beat * 4))) | |
| for bar in range(bars + 1): | |
| t0 = sec_start + bar * beat * 4 | |
| if t0 >= duration: | |
| break | |
| degree = progression[(bar + sec_idx) % len(progression)] | |
| chord_degrees = [degree, (degree + 2) % 7, (degree + 4) % 7] | |
| chord_freqs = [note_freq(root, int(scale[d]), octave_shift=-1) for d in chord_degrees] | |
| chord = render_chord(chord_freqs, min(beat * 3.8, duration - t0), plan.brightness) | |
| add_at(track, int(t0 * SR), chord, 0.24 * energy) | |
| bass_degree = int(scale[degree]) | |
| for step in range(4): | |
| bt = t0 + step * beat | |
| if bt >= duration: | |
| continue | |
| bass_freq = note_freq(root, bass_degree, octave_shift=-2) | |
| bass = render_tone(bass_freq, beat * 0.82, "sine", 0.28 * energy) | |
| add_at(track, int(bt * SR), bass, 1.0) | |
| melody_steps = min(int(sec_len / (beat * 0.5)), 96) | |
| for m in range(melody_steps): | |
| if rng.random() > 0.72 + (creativity - 1.0) * 0.25: | |
| continue | |
| mt = sec_start + m * beat * 0.5 + (beat * plan.swing if m % 2 else 0) | |
| if mt >= duration: | |
| continue | |
| idx = (m + syllable_proxy + sec_idx * 2 + int(rng.integers(0, max(2, diversity // 15)))) % len(scale) | |
| octave = 0 if rng.random() < 0.75 else 1 | |
| mf = note_freq(root, int(scale[idx]), octave_shift=octave) | |
| lead = render_tone(mf, beat * (0.34 + 0.20 * rng.random()), "tri", 0.17 * energy) | |
| add_at(track, int(mt * SR), lead, 1.0) | |
| if "vinyl" in tags.lower() or "lofi" in tags.lower(): | |
| noise = rng.normal(0, 0.008, length).astype(np.float32) | |
| sos = signal.butter(2, 5000, btype="lowpass", fs=SR, output="sos") | |
| track += signal.sosfilt(sos, noise) * 0.5 | |
| if "wide" in tags.lower() or "ambient" in tags.lower() or "cinematic" in tags.lower(): | |
| pad = np.roll(track, int(0.028 * SR)) * 0.12 + np.roll(track, int(0.061 * SR)) * 0.08 | |
| track += pad | |
| track = master(track, plan.brightness) | |
| out = export_audio(track, "mp3") | |
| stats = ( | |
| "Generated on BeatForge CPU Composer.\n\n" | |
| f"Key: {plan.key} {plan.scale}\n" | |
| f"Tempo: {plan.tempo} BPM\n" | |
| f"Sections: {', '.join(s.name for s in plan.sections)}\n" | |
| f"Duration: {duration}s\n" | |
| f"Engine: free-tier procedural composer\n\n" | |
| "For neural HeartMuLa quality, upgrade this Space to GPU and connect the HeartMuLa backend." | |
| ) | |
| return out, stats | |
| def master(audio: np.ndarray, brightness: float) -> np.ndarray: | |
| sos_hp = signal.butter(2, 32, btype="highpass", fs=SR, output="sos") | |
| audio = signal.sosfilt(sos_hp, audio) | |
| if brightness > 0.55: | |
| audio += signal.lfilter([1, -0.96], [1], audio) * 0.04 | |
| audio = np.tanh(audio * 1.45) * 0.82 | |
| return normalize(audio, 0.94) | |
| def export_audio(audio: np.ndarray, output_format: str) -> str: | |
| wav = tmp_path(".wav") | |
| sf.write(wav, normalize(audio), SR, subtype="PCM_16") | |
| if output_format == "mp3" and AudioSegment is not None: | |
| try: | |
| mp3 = tmp_path(".mp3") | |
| AudioSegment.from_wav(wav).export(mp3, format="mp3", bitrate="192k") | |
| return mp3 | |
| except Exception as exc: | |
| print(f"MP3 export failed, returning WAV: {exc}") | |
| return wav | |
| def generate_track(lyrics: str, tags: str, duration: int, creativity: float, diversity: int, cfg: float, progress=gr.Progress()): | |
| if not lyrics or not lyrics.strip(): | |
| yield None, "Add lyrics or section notes before generating." | |
| return | |
| if duration > MAX_SECONDS: | |
| yield None, f"Free CPU mode is capped at {MAX_SECONDS}s to keep the Space responsive." | |
| return | |
| try: | |
| start = time.time() | |
| progress(0.05, desc="Parsing lyrics and style tags") | |
| yield None, "Planning arrangement from lyrics and style tags..." | |
| time.sleep(0.1) | |
| progress(0.28, desc="Composing drums, bass, chords, and lead") | |
| yield None, "Composing section-aware arrangement..." | |
| audio_path, stats = render_track(lyrics, tags, duration, creativity, diversity, cfg) | |
| elapsed = time.time() - start | |
| progress(1.0, desc="Track ready") | |
| yield audio_path, stats + f"\nRender time: {elapsed:.1f}s" | |
| except Exception as exc: | |
| yield None, f"Generation failed: {exc}" | |
| def create_app() -> gr.Blocks: | |
| with gr.Blocks( | |
| css=CUSTOM_CSS, | |
| title="BeatForge by Bilal Ansari", | |
| theme=gr.themes.Soft( | |
| primary_hue=gr.themes.colors.red, | |
| secondary_hue=gr.themes.colors.blue, | |
| neutral_hue=gr.themes.colors.slate, | |
| ), | |
| ) as app: | |
| gr.HTML( | |
| """ | |
| <div class="hero"> | |
| <h1>BeatForge</h1> | |
| <p>Lyrics-to-music studio for Hugging Face free tier. Built by Bilal Ansari with a CPU-native composer and a clean upgrade path to HeartMuLa 3B.</p> | |
| <div class="hero-row"> | |
| <a class="brand-link" href="https://ansaribilal.com" target="_blank">ansaribilal.com</a> | |
| <a class="brand-link" href="https://colab.research.google.com/drive/19CCLHrTGA0424VTWL5TLx4ELB42ph4j6" target="_blank">Run Full Model in Colab</a> | |
| <span class="badge">Runner by Bilal Ansari</span> | |
| <span class="badge">CPU Basic compatible</span> | |
| <span class="badge">Lyrics + style tags</span> | |
| <span class="badge">MP3 output</span> | |
| </div> | |
| </div> | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Lyrics & Style") | |
| lyrics_box = gr.Textbox( | |
| label="Song Lyrics", | |
| lines=14, | |
| value="[Intro]\nSoft piano melody opens the room\n\n[Verse]\nThe sun creeps in across the floor\nI hear the traffic outside the door\nThe coffee pot begins to hiss\nAnother morning just like this\n\n[Chorus]\nEvery day the light returns\nEvery day the fire burns\n", | |
| placeholder="Use [Intro], [Verse], [Chorus], [Bridge], [Outro] markers.", | |
| show_copy_button=True, | |
| ) | |
| tags_box = gr.Textbox( | |
| label="Style Tags", | |
| value="piano, calm, acoustic, morning", | |
| placeholder="pop, synthwave, rock, lofi, jazz, cinematic, happy, dark, fast...", | |
| ) | |
| with gr.Row(): | |
| duration_ctrl = gr.Slider(15, MAX_SECONDS, value=60, step=5, label="Duration (seconds)") | |
| creativity_ctrl = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Creativity") | |
| with gr.Row(): | |
| diversity_ctrl = gr.Slider(20, 100, value=50, step=5, label="Diversity") | |
| cfg_ctrl = gr.Slider(1.0, 3.0, value=1.5, step=0.1, label="Style Strength") | |
| run_btn = gr.Button("Generate Track", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Output") | |
| audio_out = gr.Audio(label="Generated Track", type="filepath", interactive=False) | |
| stats_out = gr.Textbox( | |
| label="Session Notes", | |
| value="Ready. Generate a track from lyrics and style tags.", | |
| lines=9, | |
| interactive=False, | |
| elem_classes="status", | |
| ) | |
| gr.HTML( | |
| """ | |
| <div class="notice"> | |
| <strong>Free-tier mode:</strong> this Space generates original instrumental music using a CPU-native composer. HeartMuLa 3B and MusicGen are better neural backends, but they need GPU or hosted inference to run reliably.<br><br><a href="https://colab.research.google.com/drive/19CCLHrTGA0424VTWL5TLx4ELB42ph4j6" target="_blank"><strong>Open the full HeartMuLa 3B Colab runner</strong></a> for the GPU version. | |
| </div> | |
| """ | |
| ) | |
| gr.Markdown("### Example Prompts") | |
| gr.Examples( | |
| examples=[ | |
| ["[Verse]\nNeon signs above the street\nCity pulse beneath my feet\n\n[Chorus]\nAlive tonight, electric light", "electronic, synthwave, upbeat, night", 60, 1.05, 60, 1.7], | |
| ["[Intro]\nWarm guitar figure\n\n[Verse]\nQuiet morning light\nSoft and bright\n\n[Chorus]\nStay a while with me", "acoustic, calm, folk, gentle guitar", 55, 0.9, 40, 1.8], | |
| ["[Verse]\nThunder rolls across the sky\nLightning cuts the black in two\n\n[Chorus]\nFeel the power coming through", "rock, heavy, electric guitar, drums", 70, 1.1, 65, 2.0], | |
| ["[Intro]\nDusty keys and tape hiss\n\n[Verse]\nRain on the window\nLate bus rolling slow", "lofi, vinyl, mellow, rainy", 65, 0.85, 35, 1.4], | |
| ], | |
| inputs=[lyrics_box, tags_box, duration_ctrl, creativity_ctrl, diversity_ctrl, cfg_ctrl], | |
| ) | |
| with gr.Accordion("Architecture & Upgrade Path", open=False): | |
| gr.Markdown( | |
| """ | |
| **Current Space:** Lyrics + tags -> section parser -> CPU composer -> drums, bass, chords, lead, texture -> master -> MP3/WAV.\n\n | |
| **GPU upgrade:** Swap `render_track()` with a HeartMuLa 3B BF16 subprocess or a MusicGen endpoint. Start from the full Colab runner: [https://colab.research.google.com/drive/19CCLHrTGA0424VTWL5TLx4ELB42ph4j6](https://colab.research.google.com/drive/19CCLHrTGA0424VTWL5TLx4ELB42ph4j6). The UI already exposes the matching controls: duration, creativity, diversity, and style strength.\n\n | |
| **Branding:** Runner by Bilal Ansari, ansaribilal.com. | |
| """ | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| Runner by **Bilal Ansari** · [ansaribilal.com](https://ansaribilal.com) | |
| Inspired by HeartMuLa 3B and modern lyrics-to-music workflows. Built for Hugging Face free CPU tier. | |
| """ | |
| ) | |
| run_btn.click( | |
| generate_track, | |
| inputs=[lyrics_box, tags_box, duration_ctrl, creativity_ctrl, diversity_ctrl, cfg_ctrl], | |
| outputs=[audio_out, stats_out], | |
| ) | |
| return app | |
| if __name__ == "__main__": | |
| demo = create_app() | |
| demo.queue(default_concurrency_limit=1, max_size=8) | |
| demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) | |