Spaces:

Goonjan
/

KeyArrange

Sleeping

App Files Files Community

Goonjan Saha commited on 19 days ago

Commit

d10679c

unverified ·

2 Parent(s): 0972d55 621ebcd

Merge pull request #1 from sgoonjan/building-v1

Browse files

Files changed (27) hide show

Dockerfile +21 -0
README.md +3 -5
notes.txt +1 -0
pyproject.toml +11 -0
requirements.txt +22 -7
src/keyarrange/__init__.py +1 -0
src/keyarrange/analysis/__init__.py +0 -0
src/keyarrange/analysis/beat_tracker.py +26 -0
src/keyarrange/api/__init__.py +0 -0
src/keyarrange/api/app.py +102 -0
src/keyarrange/cli.py +20 -0
src/keyarrange/dataclasses.py +16 -0
src/keyarrange/piano/__init__.py +0 -0
src/keyarrange/piano/merge.py +30 -0
src/keyarrange/piano/transforms.py +88 -0
src/keyarrange/pipeline.py +77 -0
src/keyarrange/render/__init__.py +0 -0
src/keyarrange/render/piano_roll.py +121 -0
src/keyarrange/separation/__init__.py +0 -0
src/keyarrange/separation/demucs_runner.py +53 -0
src/keyarrange/structure/__init__.py +0 -0
src/keyarrange/structure/midi_parser.py +40 -0
src/keyarrange/structure/quantize.py +25 -0
src/keyarrange/transcription/__init__.py +0 -0
src/keyarrange/transcription/basic_pitch_transcriptor.py +36 -0
tests/testing_bass.py +23 -0
web/index.html +525 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+FROM python:3.11-slim
+# ffmpeg for audio I/O, libsndfile for soundfile, git for demucs model download
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ffmpeg libsndfile1 git && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Python deps first so Docker layer cache is reused on code changes
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir fastapi uvicorn python-multipart
+COPY . .
+RUN pip install --no-cache-dir -e .
+# Hugging Face Spaces requires port 7860
+EXPOSE 7860
+CMD ["uvicorn", "keyarrange.api.app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -74,19 +74,17 @@ The quick test: can a pianist sight-read it at moderate tempo, clearly assign ha
 ## Roadmap
 **v1 — in progress**
-- [ ] End-to-end pipeline: audio → MIDI
-- [ ] Three core playability transforms (density, span, note cap)
-- [ ] Web UI with MIDI download
 **v2 — planned**
 - [ ] Chord-aware left hand voicing (root + third + fifth from chord analysis)
 - [ ] MuseScore PDF rendering
-- [ ] Before/after example gallery
 **v3 — planned**
 - [ ] Beat tracking with madmom for better metric strength scoring
 - [ ] Melody smoothing — strip ornaments and melisma from vocal transcription
-- [ ] Difficulty score on output
 **Later**
 - [ ] Fine-tuned arrangement model on POP909 dataset

 ## Roadmap
 **v1 — in progress**
+- [x] End-to-end pipeline: audio → MIDI
+- [x] Three core playability transforms (density, span, note cap)
+- [x] Web UI with MIDI download
 **v2 — planned**
 - [ ] Chord-aware left hand voicing (root + third + fifth from chord analysis)
 - [ ] MuseScore PDF rendering
 **v3 — planned**
 - [ ] Beat tracking with madmom for better metric strength scoring
 - [ ] Melody smoothing — strip ornaments and melisma from vocal transcription
 **Later**
 - [ ] Fine-tuned arrangement model on POP909 dataset

notes.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ I am done with all the transforms and ran the pipeline. I feel like the output has degraded in quality: I am not able to recognize the part that is after the intro either right now. I removed the note_cap function, and it seemed to have gotten better but still not that good.

pyproject.toml ADDED Viewed

	@@ -0,0 +1,11 @@

+[project]
+name = "keyarrange"
+version = "0.1.0"
+dependencies = []
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+[tool.setuptools.packages.find]
+where = ["src"]

requirements.txt CHANGED Viewed

@@ -1,7 +1,22 @@
-demucs
-basic-pitch
-librosa
-pretty_midi
-music21
-numpy
-soundfile

+# Core audio processing
+demucs==4.0.0
+basic-pitch==0.3.2
+# Audio I/O and signal processing
+librosa==0.10.1
+scipy==1.10.1  # Pinned to avoid compatibility issues of newer versions with librosa
+soundfile==0.12.1
+torchaudio==2.1.0  # Pinned to avoid torchcodec requirement in newer versions
+# Music theory and symbolic manipulation
+music21==9.1.0
+pretty-midi==0.2.10
+setuptools==69.5.1 # Pinned to avoid pkg_resources removal in newer versions
+# Frontend and visualization
+fastapi==0.135.1
+uvicorn==0.42.0
+python-multipart==0.0.22
+# General utilities
+numpy==1.24.3

src/keyarrange/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __version__ = "0.1.0"

src/keyarrange/analysis/__init__.py ADDED Viewed

File without changes

src/keyarrange/analysis/beat_tracker.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import librosa
+import numpy as np
+import sys
+import os
+def get_beat_times(audio_path: str) -> tuple[np.ndarray, float]:
+    """
+    Uses librosa to analyze the audio file to extract beat times and BPM.
+    """
+    if not os.path.exists(audio_path):
+        raise FileNotFoundError(f"Audio file not found at: {audio_path}")
+    y, sr = librosa.load(audio_path, mono=True)
+    bpm, beat_times = librosa.beat.beat_track(y=y, sr=sr, units='time')
+    if beat_times.size == 0:
+        raise ValueError("No beats detected in the audio file.")
+    return beat_times, bpm
+if __name__ == "__main__":
+    res_1, res_2 = get_beat_times(sys.argv[1])
+    print(len(res_1))
+    print(res_2)

src/keyarrange/api/__init__.py ADDED Viewed

File without changes

src/keyarrange/api/app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+"""FastAPI backend — wraps Pipeline and serves MIDI + piano roll."""
+import asyncio
+import logging
+import uuid
+from pathlib import Path
+from fastapi import FastAPI, File, HTTPException, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse, HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from keyarrange.pipeline import Pipeline
+from keyarrange.render.piano_roll import render_piano_roll
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s")
+app = FastAPI(title="KeyArrange")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+UPLOAD_DIR = Path("/tmp/keyarrange/uploads")
+OUTPUT_DIR = Path("/tmp/keyarrange/outputs")
+UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+# Serve frontend
+app.mount("/web", StaticFiles(directory="web"), name="web")
+@app.get("/", response_class=HTMLResponse)
+async def root():
+    return (Path("web") / "index.html").read_text()
+@app.get("/health")
+async def health():
+    return {"status": "ok"}
+@app.post("/upload")
+async def upload(file: UploadFile = File(...)):
+    if not file.filename.lower().endswith((".mp3", ".wav")):
+        raise HTTPException(status_code=400, detail="Only MP3 and WAV files are supported.")
+    job_id = Path(file.filename).stem[:40].replace(" ", "_") + "_" + uuid.uuid4().hex[:6]
+    upload_path = UPLOAD_DIR / f"{job_id}{Path(file.filename).suffix}"
+    upload_path.write_bytes(await file.read())
+    logger.info(f"Job {job_id}: starting pipeline")
+    try:
+        loop = asyncio.get_event_loop()
+        midi_path = await loop.run_in_executor(None, _run_pipeline, str(upload_path), job_id)
+    except Exception as e:
+        logger.error(f"Job {job_id} failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Pipeline failed: {str(e)}")
+    piano_roll_path = Path(midi_path).parent / "piano_roll.png"
+    try:
+        await loop.run_in_executor(None, render_piano_roll, midi_path, str(piano_roll_path))
+    except Exception as e:
+        logger.warning(f"Job {job_id}: piano roll render failed ({e}), continuing without it")
+        piano_roll_path = None
+    return {
+        "job_id": job_id,
+        "midi_url": f"/download/midi/{job_id}",
+        "piano_roll_url": f"/download/piano_roll/{job_id}" if piano_roll_path and piano_roll_path.exists() else None,
+    }
+def _run_pipeline(upload_path: str, job_id: str) -> str:
+    """Synchronous pipeline call — run in executor to avoid blocking event loop."""
+    output_dir = str(OUTPUT_DIR / job_id)
+    pipeline = Pipeline(upload_path, output_dir)
+    return pipeline.run()
+@app.get("/download/{file_type}/{job_id}")
+async def download(file_type: str, job_id: str):
+    if file_type == "midi":
+        # Pipeline writes to OUTPUT_DIR/job_id/<song_name>/arranged/arranged.mid
+        # Glob for it since song_name is embedded in path
+        matches = list((OUTPUT_DIR / job_id).rglob("arranged.mid"))
+        if not matches:
+            raise HTTPException(status_code=404, detail="MIDI file not found.")
+        return FileResponse(str(matches[0]), media_type="audio/midi", filename="keyarrange.mid")
+    elif file_type == "piano_roll":
+        matches = list((OUTPUT_DIR / job_id).rglob("piano_roll.png"))
+        if not matches:
+            raise HTTPException(status_code=404, detail="Piano roll not found.")
+        return FileResponse(str(matches[0]), media_type="image/png")
+    else:
+        raise HTTPException(status_code=400, detail="file_type must be 'midi' or 'piano_roll'.")

src/keyarrange/cli.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import logging
+import sys
+from keyarrange.pipeline import Pipeline
+def main(input_path: str, output_dir: str) -> None:
+    handler = logging.StreamHandler()
+    handler.setFormatter(logging.Formatter('%(asctime)s [%(name)s] %(message)s'))
+    logging.getLogger().addHandler(handler)
+    logging.getLogger().setLevel(logging.INFO)
+    pipeline = Pipeline(input_path, output_dir)
+    arranged_midi_path = pipeline.run()
+    print(f"Arranged MIDI saved at: {arranged_midi_path}")
+if __name__ == "__main__":
+    main(sys.argv[1], sys.argv[2])

src/keyarrange/dataclasses.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from dataclasses import dataclass
+@dataclass
+class Note:
+    id: int
+    pitch: int
+    start: float
+    end: float
+    velocity: int
+    hand: str  # 'right' or 'left'
+    @property
+    def duration(self) -> float:
+        return self.end - self.start

src/keyarrange/piano/__init__.py ADDED Viewed

File without changes

src/keyarrange/piano/merge.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import pretty_midi
+import os
+import sys
+from keyarrange.dataclasses import Note
+def merge_tracks(right: list[Note], left: list[Note], output_file: str, bpm: float) -> None:
+    if not right and not left:
+        raise ValueError('No notes to merge')
+    midi = pretty_midi.PrettyMIDI(initial_tempo=bpm)
+    right_hand_instrument = pretty_midi.Instrument(program=0, name='Right Hand')
+    left_hand_instrument = pretty_midi.Instrument(program=0, name='Left Hand')
+    for note in right:
+        if note.start >= note.end:
+            print(f"Warning: Skipping right hand note with start time ({note.start}) >= end time ({note.end})", file=sys.stderr)
+            continue
+        right_hand_instrument.notes.append(pretty_midi.Note(velocity=note.velocity, pitch=note.pitch, start=note.start, end=note.end))
+    for note in left:
+        if note.start >= note.end:
+            print(f"Warning: Skipping left hand note with start time ({note.start}) >= end time ({note.end})", file=sys.stderr)
+            continue
+        left_hand_instrument.notes.append(pretty_midi.Note(velocity=note.velocity, pitch=note.pitch, start=note.start, end=note.end))
+    midi.instruments.append(right_hand_instrument)
+    midi.instruments.append(left_hand_instrument)
+    midi.write(output_file)

src/keyarrange/piano/transforms.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from keyarrange.dataclasses import Note
+def _group_by_onset(notes: list[Note], window_ms: float = 50.0) -> list[list[Note]]:
+    sorted_notes = sorted(notes, key=lambda note: note.start)
+    groups: list[list[Note]] = []
+    if not sorted_notes:
+        return groups
+    current_group: list[Note] = []
+    window_seconds = window_ms / 1000.0
+    for note in sorted_notes:
+        if not current_group or (note.start - current_group[0].start) <= window_seconds:
+            current_group.append(note)
+        else:
+            groups.append(current_group)
+            current_group = [note]
+    if current_group:
+        groups.append(current_group)
+    return groups
+def density_reducer(notes: list[Note], bpm: float, multiplier: int = 1) -> list[Note]:
+    window_duration = 0.5  # 500ms
+    step_size = 0.25  # 250ms
+    dropped_note_ids = set()
+    notes.sort(key=lambda note: note.start)
+    last_note_start_time = 0.0
+    if notes:
+        last_note_start_time = max(note.start for note in notes)
+    t = 0.0
+    while t <= last_note_start_time + window_duration: # Extend windowing slightly past the last note's start time
+        window_notes = [note for note in notes if t <= note.start < t + window_duration and note.id not in dropped_note_ids]
+        max_notes_in_window = max(1, int(120 / bpm)) * multiplier
+        if len(window_notes) > max_notes_in_window:
+            # Sort by duration (longest first) and keep only the top `max_notes_in_window`
+            window_notes.sort(key=lambda note: note.duration, reverse=True)
+            notes_to_drop = window_notes[max_notes_in_window:]
+            for note in notes_to_drop:
+                dropped_note_ids.add(note.id)
+        t += step_size
+    return [note for note in notes if note.id not in dropped_note_ids]
+def span_enforcer(notes: list[Note], max_span: int = 12, hand: str = "right") -> list[Note]:
+    processed_notes: list[Note] = []
+    onset_groups = _group_by_onset(notes)
+    for group in onset_groups:
+        current_group = sorted(group, key=lambda note: note.pitch)
+        while len(current_group) > 1 and (current_group[-1].pitch - current_group[0].pitch) > max_span:
+            if hand == "right":
+                # For right hand, drop the lowest pitch note to reduce span
+                current_group.pop(0)
+            elif hand == "left":
+                # For left hand, drop the highest pitch note to reduce span
+                current_group.pop(-1)
+            else:
+                # Should not happen with valid input, but as a safeguard
+                break
+        processed_notes.extend(current_group)
+    return processed_notes
+def note_cap(notes: list[Note], max_notes: int = 3) -> list[Note]:
+    processed_notes: list[Note] = []
+    onset_groups = _group_by_onset(notes)
+    for group in onset_groups:
+        current_group = sorted(group, key=lambda note: note.duration)
+        while len(current_group) > max_notes:
+            current_group.pop(0)  # Drop the shortest duration note
+        processed_notes.extend(current_group)
+    return processed_notes

src/keyarrange/pipeline.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""Pipeline coordinator — owns directory structure and stage sequencing."""
+import logging
+from pathlib import Path
+from keyarrange.separation.demucs_runner import separate
+from keyarrange.transcription.basic_pitch_transcriptor import transcribe_stem
+from keyarrange.analysis.beat_tracker import get_beat_times
+from keyarrange.structure.midi_parser import load_midi
+from keyarrange.structure.quantize import quantize_to_beats
+from keyarrange.piano.merge import merge_tracks
+from keyarrange.piano.transforms import density_reducer, span_enforcer, note_cap
+logger = logging.getLogger(__name__)
+class Pipeline:
+    """Disk-based pipeline: each stage writes output before the next runs."""
+    def __init__(self, input_path: str, output_dir: str):
+        self.input_path = Path(input_path)
+        if not self.input_path.exists():
+            raise ValueError(f"Input file does not exist: {input_path}")
+        song_name = self.input_path.stem
+        base_dir = Path(output_dir) / song_name
+        self.base_dir = base_dir
+        self.stems_dir = base_dir / "stems"
+        self.transcriptions_dir = base_dir / "transcriptions"
+        self.arranged_dir = base_dir / "arranged"
+        self.stems_dir.mkdir(parents=True, exist_ok=True)
+        self.transcriptions_dir.mkdir(parents=True, exist_ok=True)
+        self.arranged_dir.mkdir(parents=True, exist_ok=True)
+    def run(self) -> str:
+        output_file_path = self.arranged_dir / "arranged.mid"
+        logger.info("Running stem separation...")
+        stem_paths = separate(str(self.input_path), str(self.stems_dir))
+        vocals_audio_path = stem_paths["vocals"]
+        bass_audio_path = stem_paths["bass"]
+        logger.info("Transcribing vocal stem...")
+        vocals_midi_path = transcribe_stem(vocals_audio_path, str(self.transcriptions_dir))
+        logger.info("Transcribing bass stem...")
+        bass_midi_path = transcribe_stem(bass_audio_path, str(self.transcriptions_dir))
+        logger.info("Getting beat times...")
+        beat_times, bpm = get_beat_times(str(self.input_path))
+        logger.info("Loading vocal MIDI...")
+        right_notes = load_midi(str(vocals_midi_path), hand="right")
+        logger.info("Loading bass MIDI...")
+        left_notes = load_midi(str(bass_midi_path), hand="left")
+        logger.info("Quantizing left hand notes...")
+        quantized_left_notes = quantize_to_beats(left_notes, beat_times)
+        logger.info("Applying transformations to Right hand notes...")
+        right_notes = density_reducer(right_notes, bpm, multiplier=2)  # Allow density relaxation for vocals
+        right_notes = span_enforcer(right_notes, max_span=12, hand="right")
+        right_notes = note_cap(right_notes, max_notes=3)
+        logger.info("Applying transformations to Left hand notes...")
+        left_notes = density_reducer(quantized_left_notes, bpm)
+        left_notes = span_enforcer(left_notes, max_span=12, hand="left")
+        left_notes = note_cap(left_notes, max_notes=3)
+        logger.info("Merging tracks...")
+        merge_tracks(right_notes, left_notes, str(output_file_path), bpm)
+        logger.info(f"Pipeline complete: {output_file_path}")
+        return str(output_file_path)

src/keyarrange/render/__init__.py ADDED Viewed

File without changes

src/keyarrange/render/piano_roll.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""Renders a static piano roll PNG from a two-track arranged MIDI file."""
+import logging
+import matplotlib
+matplotlib.use("Agg")  # non-interactive backend — safe for server use
+import matplotlib.patches as mpatches
+import matplotlib.pyplot as plt
+import pretty_midi
+logger = logging.getLogger(__name__)
+# Pitch range displayed (covers standard piano arrangement output)
+PITCH_LOW = 36   # C2
+PITCH_HIGH = 84  # C6
+RIGHT_COLOR = "#4a9eff"   # blue — right hand (vocals/melody)
+LEFT_COLOR  = "#ff6b6b"   # red  — left hand (bass/harmony)
+BG_COLOR    = "#0d0d0d"
+SURFACE     = "#141414"
+ACCENT      = "#c8f043"
+def render_piano_roll(midi_path: str, output_path: str) -> str:
+    """Read MIDI at midi_path, write piano roll PNG to output_path, return output_path."""
+    pm = pretty_midi.PrettyMIDI(midi_path)
+    duration = pm.get_end_time()
+    if duration == 0:
+        raise ValueError("MIDI file has no notes.")
+    fig_w, fig_h = 14, 5
+    fig, ax = plt.subplots(figsize=(fig_w, fig_h))
+    fig.patch.set_facecolor(BG_COLOR)
+    ax.set_facecolor(SURFACE)
+    pitch_range = PITCH_HIGH - PITCH_LOW
+    note_height = 0.75  # in pitch units
+    # --- time grid lines every 2 seconds ---
+    for t in range(0, int(duration) + 1, 2):
+        ax.axvline(x=t, color="white", alpha=0.08, linewidth=0.5, zorder=1)
+    # --- middle C divider (pitch 60) ---
+    ax.axhline(y=60, color="white", alpha=0.25, linewidth=0.8, linestyle="--", zorder=2)
+    ax.text(duration * 0.01, 60.3, "C4 · middle C",
+            color="white", alpha=0.35, fontsize=7, fontfamily="monospace")
+    # --- draw notes per track ---
+    colors = [RIGHT_COLOR, LEFT_COLOR]
+    labels = ["Right hand (melody)", "Left hand (harmony)"]
+    for track_idx, instrument in enumerate(pm.instruments[:2]):
+        color = colors[track_idx] if track_idx < len(colors) else "#aaaaaa"
+        for note in instrument.notes:
+            if not (PITCH_LOW <= note.pitch <= PITCH_HIGH):
+                continue
+            rect = mpatches.FancyBboxPatch(
+                (note.start, note.pitch - note_height / 2),
+                note.end - note.start,
+                note_height,
+                boxstyle="round,pad=0.02",
+                linewidth=0,
+                facecolor=color,
+                alpha=0.85,
+                zorder=3,
+            )
+            ax.add_patch(rect)
+    # --- keyboard strip on left edge ---
+    _draw_keyboard_strip(ax, duration)
+    # --- axes ---
+    ax.set_xlim(0, duration)
+    ax.set_ylim(PITCH_LOW - 1, PITCH_HIGH + 1)
+    ax.set_xlabel("Time (seconds)", color="#666", fontsize=9, labelpad=6)
+    ax.tick_params(colors="#444")
+    ax.spines[:].set_visible(False)
+    # y-axis: show octave labels only
+    octave_ticks = [p for p in range(PITCH_LOW, PITCH_HIGH + 1) if p % 12 == 0]
+    ax.set_yticks(octave_ticks)
+    ax.set_yticklabels([pretty_midi.note_number_to_name(p) for p in octave_ticks],
+                       color="#555", fontsize=8)
+    # --- legend ---
+    legend_patches = [
+        mpatches.Patch(color=RIGHT_COLOR, label=labels[0]),
+        mpatches.Patch(color=LEFT_COLOR,  label=labels[1]),
+    ]
+    ax.legend(handles=legend_patches, loc="upper right",
+              facecolor="#1a1a1a", edgecolor="#333",
+              labelcolor="white", fontsize=9, framealpha=0.9)
+    plt.tight_layout(pad=0.5)
+    fig.savefig(output_path, dpi=150, bbox_inches="tight", facecolor=BG_COLOR)
+    plt.close(fig)
+    logger.info(f"Piano roll saved: {output_path}")
+    return output_path
+def _draw_keyboard_strip(ax, duration):
+    """Draws a minimal keyboard indicator along the left edge of the plot."""
+    strip_width = duration * 0.012  # ~1.2% of total width
+    x0 = -strip_width * 1.1
+    # Black key pitches within an octave (semitone offsets: 1,3,6,8,10)
+    black_offsets = {1, 3, 6, 8, 10}
+    for pitch in range(PITCH_LOW, PITCH_HIGH + 1):
+        is_black = (pitch % 12) in black_offsets
+        color = "#111" if is_black else "#ddd"
+        rect = mpatches.Rectangle(
+            (x0, pitch - 0.45),
+            strip_width * (0.6 if is_black else 1.0),
+            0.88,
+            linewidth=0.3,
+            edgecolor="#333",
+            facecolor=color,
+            zorder=4,
+            clip_on=False,
+        )
+        ax.add_patch(rect)

src/keyarrange/separation/__init__.py ADDED Viewed

File without changes

src/keyarrange/separation/demucs_runner.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import subprocess
+import sys
+from pathlib import Path
+def separate(audio_path: str, output_dir: str) -> dict[str, str]:
+    """
+    Run Demucs source separation on an audio file.
+    Returns:
+        Dictionary mapping stem names to absolute file paths
+    Raises:
+        RuntimeError: If expected stem files are missing after separation
+    """
+    audio_path = Path(audio_path).resolve()
+    output_dir = Path(output_dir).resolve()
+    # Run Demucs separation
+    subprocess.run(
+        ["python", "-m", "demucs", "-n", "htdemucs", "--out", str(output_dir), str(audio_path)],
+        check=True
+    )
+    # Demucs creates nested structure: {output_dir}/htdemucs/{song_name}/
+    song_name = audio_path.stem
+    stems_base = output_dir / "htdemucs" / song_name
+    # Locate all four expected stems
+    expected_stems = ["vocals", "bass", "drums", "other"]
+    stem_paths = {}
+    missing_stems = []
+    for stem in expected_stems:
+        stem_file = stems_base / f"{stem}.wav"
+        if stem_file.exists():
+            stem_paths[stem] = str(stem_file.resolve())
+        else:
+            missing_stems.append(stem)
+    if missing_stems:
+        raise RuntimeError(
+            f"Missing stems after Demucs separation: {missing_stems}. "
+            f"Looked in: {stems_base}"
+        )
+    return stem_paths
+if __name__ == "__main__":
+    result = separate(sys.argv[1], sys.argv[2])
+    for stem, path in result.items():
+        print(f"{stem}: {path}")

src/keyarrange/structure/__init__.py ADDED Viewed

File without changes

src/keyarrange/structure/midi_parser.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import os
+import sys
+import pretty_midi
+from keyarrange.dataclasses import Note
+def load_midi(path: str, hand: str) -> list[Note]:
+    """
+    Extracts note information from MIDI file into a list of Note dataclasses.
+    """
+    if hand not in ['right', 'left']:
+        raise ValueError("hand must be either 'right' or 'left'")
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"MIDI file not found at {path}")
+    midi_data = pretty_midi.PrettyMIDI(path)
+    notes: list[Note] = []
+    for instrument in midi_data.instruments:
+        for pm_note in instrument.notes:
+            notes.append(Note(
+                id = len(notes),
+                pitch=pm_note.pitch,
+                start=pm_note.start,
+                end=pm_note.end,
+                velocity=pm_note.velocity,
+                hand=hand
+            ))
+    if not notes:
+        raise ValueError('No notes found in MIDI file')
+    notes.sort(key=lambda note: note.start) # Ordered for easier processing
+    return notes
+if __name__ == "__main__":
+    result = load_midi(sys.argv[1], sys.argv[2])
+    print(len(result))

src/keyarrange/structure/quantize.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import numpy as np
+from keyarrange.dataclasses import Note
+def quantize_to_beats(notes: list[Note], beat_times: np.ndarray) -> list[Note]:
+    if beat_times.size == 0:
+        raise ValueError("beat_times cannot be empty.")
+    quantized_notes = []
+    for note in notes:
+        nearest_beat_index = np.argmin(np.abs(beat_times - note.start))
+        new_start = beat_times[nearest_beat_index]
+        new_end = new_start + note.duration
+        quantized_note = Note(
+            id=note.id,
+            start=new_start,
+            end=new_end,
+            pitch=note.pitch,
+            velocity=note.velocity,
+            hand=note.hand,
+        )
+        quantized_notes.append(quantized_note)
+    return quantized_notes

src/keyarrange/transcription/__init__.py ADDED Viewed

File without changes

src/keyarrange/transcription/basic_pitch_transcriptor.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import sys
+from pathlib import Path
+from basic_pitch.inference import predict
+def transcribe_stem(audio_path: str, output_dir: str) -> Path:
+    """
+    Transcribe audio stem to MIDI using Basic Pitch.
+    """
+    audio_path = Path(audio_path).resolve()
+    output_dir = Path(output_dir).resolve()
+    if not audio_path.exists():
+        raise FileNotFoundError(f"Audio file not found: {audio_path}")
+    output_dir.mkdir(parents=True, exist_ok=True)
+    stem_name = audio_path.stem
+    output_path = output_dir / f"{stem_name}_transcription.mid"
+    print(f"Starting transcription for {audio_path.name}...")
+    # Call predict with the string representation of the audio_path
+    _, midi_data, _ = predict(
+        str(audio_path)
+    )
+    # Save the PrettyMIDI object to the specified output path
+    midi_data.write(str(output_path))
+    print(f"Transcription complete. MIDI saved to {output_path}")
+    return output_path
+if __name__ == "__main__":
+    output_path = transcribe_stem(sys.argv[1], sys.argv[2])
+    print(f"Transcribed stem: {output_path}")

tests/testing_bass.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import soundfile as sf
+import numpy as np
+import matplotlib.pyplot as plt
+# Load the bass stem
+bass, sr = sf.read('../data/output/a_thousand_years/stems/htdemucs/a_thousand_years/bass.wav')
+# Check if there's actually audio content
+print(f"Sample rate: {sr}")
+print(f"Duration: {len(bass) / sr:.2f} seconds")
+print(f"Max amplitude: {np.max(np.abs(bass)):.6f}")
+print(f"RMS energy: {np.sqrt(np.mean(bass**2)):.6f}")
+# Quick playability check
+if np.max(np.abs(bass)) < 0.001:
+    print("⚠️  Bass stem appears to be silent or very quiet")
+else:
+    print("✓ Bass stem has content")
+plt.figure(figsize=(12, 4))
+plt.plot(bass[:sr*10])  # First 10 seconds
+plt.title("Bass waveform")
+plt.show()

web/index.html ADDED Viewed

	@@ -0,0 +1,525 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>KeyArrange</title>
+<!-- MIDI player web component — handles audio synthesis + scrolling piano roll visualization -->
+<script src="https://cdn.jsdelivr.net/combine/npm/tone@14.7.58,npm/@magenta/music@1.23.1/es6/core.js,npm/html-midi-player@1.5.0"></script>
+<style>
+  @import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=DM+Mono:wght@400;500&family=DM+Sans:wght@300;400;500&display=swap');
+  :root {
+    --bg: #0d0d0d;
+    --surface: #141414;
+    --surface2: #1a1a1a;
+    --border: #242424;
+    --accent: #c8f043;
+    --accent2: #f0c843;
+    --accent3: #43c8f0;
+    --text: #e8e8e0;
+    --text-dim: #666;
+    --text-mid: #999;
+  }
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body {
+    background: var(--bg);
+    color: var(--text);
+    font-family: 'DM Sans', sans-serif;
+    font-weight: 300;
+    min-height: 100vh;
+    padding: 48px 24px 80px;
+  }
+  .container { max-width: 760px; margin: 0 auto; }
+  header {
+    border-bottom: 1px solid var(--border);
+    padding-bottom: 32px;
+    margin-bottom: 40px;
+  }
+  .eyebrow {
+    font-family: 'DM Mono', monospace;
+    font-size: 11px;
+    letter-spacing: 0.2em;
+    color: var(--accent);
+    text-transform: uppercase;
+    margin-bottom: 12px;
+  }
+  h1 {
+    font-family: 'DM Serif Display', serif;
+    font-size: clamp(28px, 5vw, 44px);
+    line-height: 1.1;
+    color: var(--text);
+    margin-bottom: 16px;
+  }
+  h1 em { font-style: italic; color: var(--accent); }
+  .subtitle {
+    font-size: 14px;
+    color: var(--text-mid);
+    max-width: 480px;
+    line-height: 1.7;
+  }
+  /* Upload */
+  .upload-section { margin-bottom: 32px; }
+  .drop-zone {
+    border: 1px dashed var(--border);
+    border-radius: 3px;
+    padding: 40px 32px;
+    text-align: center;
+    background: var(--surface);
+    cursor: pointer;
+    transition: all 0.2s;
+    position: relative;
+  }
+  .drop-zone:hover, .drop-zone.drag-over {
+    border-color: var(--accent);
+    background: rgba(200, 240, 67, 0.03);
+  }
+  .drop-zone input[type="file"] {
+    position: absolute;
+    inset: 0;
+    opacity: 0;
+    cursor: pointer;
+    width: 100%;
+    height: 100%;
+  }
+  .drop-icon { font-size: 28px; margin-bottom: 12px; color: var(--text-dim); }
+  .drop-label { font-size: 14px; color: var(--text-mid); margin-bottom: 6px; }
+  .drop-sub {
+    font-family: 'DM Mono', monospace;
+    font-size: 11px;
+    color: var(--text-dim);
+    letter-spacing: 0.1em;
+    text-transform: uppercase;
+  }
+  .file-selected {
+    margin-top: 14px;
+    display: none;
+    align-items: center;
+    gap: 10px;
+    padding: 12px 16px;
+    background: rgba(200, 240, 67, 0.06);
+    border: 1px solid rgba(200, 240, 67, 0.2);
+    border-radius: 3px;
+  }
+  .file-selected.visible { display: flex; }
+  .file-name {
+    font-family: 'DM Mono', monospace;
+    font-size: 12px;
+    color: var(--accent);
+    flex: 1;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    white-space: nowrap;
+  }
+  .file-clear {
+    background: none;
+    border: none;
+    color: var(--text-dim);
+    cursor: pointer;
+    font-size: 16px;
+    line-height: 1;
+    padding: 0 4px;
+  }
+  .file-clear:hover { color: var(--text); }
+  .btn-arrange {
+    width: 100%;
+    padding: 16px;
+    background: var(--accent);
+    color: #0d0d0d;
+    border: none;
+    border-radius: 3px;
+    font-family: 'DM Mono', monospace;
+    font-size: 13px;
+    font-weight: 500;
+    letter-spacing: 0.1em;
+    text-transform: uppercase;
+    cursor: pointer;
+    margin-top: 12px;
+    transition: all 0.15s;
+  }
+  .btn-arrange:hover:not(:disabled) { background: #d8ff55; }
+  .btn-arrange:disabled {
+    background: var(--border);
+    color: var(--text-dim);
+    cursor: not-allowed;
+  }
+  /* Status */
+  .status-card {
+    display: none;
+    padding: 20px 24px;
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 3px;
+    margin-top: 20px;
+  }
+  .status-card.visible { display: block; }
+  .status-eyebrow {
+    font-family: 'DM Mono', monospace;
+    font-size: 10px;
+    letter-spacing: 0.18em;
+    text-transform: uppercase;
+    color: var(--accent3);
+    margin-bottom: 8px;
+  }
+  .status-message { font-size: 14px; color: var(--text-mid); line-height: 1.6; }
+  .progress-bar-track {
+    height: 2px;
+    background: var(--border);
+    border-radius: 2px;
+    margin-top: 16px;
+    overflow: hidden;
+  }
+  .progress-bar-fill {
+    height: 100%;
+    background: var(--accent3);
+    border-radius: 2px;
+    animation: indeterminate 2s ease-in-out infinite;
+  }
+  @keyframes indeterminate {
+    0%   { width: 5%;  margin-left: 0%; }
+    50%  { width: 40%; margin-left: 30%; }
+    100% { width: 5%;  margin-left: 95%; }
+  }
+  /* Error */
+  .error-card {
+    display: none;
+    padding: 16px 20px;
+    background: rgba(255, 60, 60, 0.06);
+    border: 1px solid rgba(255, 60, 60, 0.2);
+    border-radius: 3px;
+    margin-top: 20px;
+    font-size: 13px;
+    color: #ff6b6b;
+    font-family: 'DM Mono', monospace;
+  }
+  .error-card.visible { display: block; }
+  /* Results */
+  .results-section { display: none; margin-top: 32px; }
+  .results-section.visible { display: block; }
+  .results-header {
+    display: flex;
+    align-items: baseline;
+    gap: 12px;
+    margin-bottom: 20px;
+    padding-bottom: 16px;
+    border-bottom: 1px solid var(--border);
+  }
+  .results-tag {
+    font-family: 'DM Mono', monospace;
+    font-size: 10px;
+    letter-spacing: 0.15em;
+    text-transform: uppercase;
+    padding: 3px 8px;
+    border-radius: 2px;
+    background: rgba(200, 240, 67, 0.12);
+    color: var(--accent);
+  }
+  .results-title {
+    font-family: 'DM Serif Display', serif;
+    font-size: 22px;
+    color: var(--text);
+  }
+  /* MIDI player container */
+  .player-wrap {
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: 3px;
+    overflow: hidden;
+    margin-bottom: 20px;
+  }
+  .player-label {
+    font-family: 'DM Mono', monospace;
+    font-size: 10px;
+    letter-spacing: 0.15em;
+    text-transform: uppercase;
+    color: var(--text-dim);
+    padding: 12px 16px 8px;
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+  }
+  .player-legend {
+    display: flex;
+    gap: 16px;
+  }
+  .legend-item {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    font-size: 11px;
+    color: var(--text-dim);
+  }
+  .legend-dot {
+    width: 9px;
+    height: 9px;
+    border-radius: 2px;
+    flex-shrink: 0;
+  }
+  /*
+    html-midi-player uses a shadow DOM so deep CSS overrides are limited.
+    These CSS custom properties are what the component officially exposes.
+    The visualizer background and note colors are the most impactful ones.
+  */
+  midi-player {
+    display: block;
+    width: 100%;
+    background: var(--surface2);
+    border-top: 1px solid var(--border);
+  }
+  midi-visualizer {
+    display: block;
+    width: 100%;
+    /* Note colors per track index — track 0 = right hand, track 1 = left hand */
+    --midi-visualizer-notes-color: #4a9eff;       /* fallback / right hand */
+    background: #0a0a0a;
+    min-height: 160px;
+  }
+  /* Download */
+  .download-row { display: flex; gap: 10px; flex-wrap: wrap; }
+  .btn-download {
+    padding: 12px 20px;
+    background: transparent;
+    border: 1px solid var(--border);
+    border-radius: 3px;
+    color: var(--text);
+    font-family: 'DM Mono', monospace;
+    font-size: 12px;
+    letter-spacing: 0.1em;
+    text-transform: uppercase;
+    text-decoration: none;
+    cursor: pointer;
+    transition: all 0.15s;
+    display: inline-flex;
+    align-items: center;
+    gap: 8px;
+  }
+  .btn-download:hover { border-color: var(--accent); color: var(--accent); }
+  .btn-download .btn-icon { font-size: 15px; }
+  /* Footer */
+  footer {
+    margin-top: 64px;
+    padding-top: 24px;
+    border-top: 1px solid var(--border);
+    font-family: 'DM Mono', monospace;
+    font-size: 11px;
+    color: var(--text-dim);
+    display: flex;
+    justify-content: space-between;
+    flex-wrap: wrap;
+    gap: 8px;
+  }
+  footer a { color: var(--text-dim); text-decoration: none; }
+  footer a:hover { color: var(--accent); }
+</style>
+</head>
+<body>
+<div class="container">
+  <header>
+    <div class="eyebrow">KeyArrange · MVP</div>
+    <h1>Upload a song.<br><em>Get music you can play.</em></h1>
+    <p class="subtitle">Drop any pop song. The pipeline separates stems, transcribes each part, and arranges a two-hand piano version built around physical playability constraints.</p>
+  </header>
+  <!-- Upload -->
+  <div class="upload-section">
+    <div class="drop-zone" id="dropZone">
+      <input type="file" id="fileInput" accept=".mp3,.wav" />
+      <div class="drop-icon">♩</div>
+      <div class="drop-label">Drop an MP3 or WAV here</div>
+      <div class="drop-sub">or click to browse</div>
+    </div>
+    <div class="file-selected" id="fileSelected">
+      <span class="file-name" id="fileName"></span>
+      <button class="file-clear" id="fileClear" title="Remove">✕</button>
+    </div>
+    <button class="btn-arrange" id="arrangeBtn" disabled>Arrange →</button>
+  </div>
+  <!-- Status -->
+  <div class="status-card" id="statusCard">
+    <div class="status-eyebrow">Processing</div>
+    <div class="status-message">
+      Separating stems, transcribing, arranging… this takes 1–3 minutes depending on song length.
+    </div>
+    <div class="progress-bar-track">
+      <div class="progress-bar-fill"></div>
+    </div>
+  </div>
+  <!-- Error -->
+  <div class="error-card" id="errorCard"></div>
+  <!-- Results -->
+  <div class="results-section" id="resultsSection">
+    <div class="results-header">
+      <span class="results-tag">Output</span>
+      <h2 class="results-title">Your arrangement</h2>
+    </div>
+    <div class="player-wrap">
+      <div class="player-label">
+        <span>Piano Roll · press play to hear it</span>
+        <div class="player-legend">
+          <div class="legend-item">
+            <div class="legend-dot" style="background:#ff6b6b"></div>
+            Playing now
+          </div>
+          <div class="legend-item">
+            <div class="legend-dot" style="background:#4a9eff"></div>
+            Upcoming
+          </div>
+        </div>
+      </div>
+      <!--
+        midi-visualizer renders the scrolling piano roll.
+        midi-player handles play/pause/seek and audio via Magenta soundfont.
+        They are linked by the visualizer="#midiVisualizer" attribute.
+        sound-font enables the Magenta piano soundfont (requires internet).
+      -->
+      <midi-visualizer type="piano-roll" id="midiVisualizer"></midi-visualizer>
+      <midi-player id="midiPlayer" sound-font visualizer="#midiVisualizer"></midi-player>
+    </div>
+    <div class="download-row">
+      <a class="btn-download" id="midiDownload" href="#" download="keyarrange.mid">
+        <span class="btn-icon">↓</span> Download MIDI
+      </a>
+    </div>
+  </div>
+  <footer>
+    <span>KeyArrange · audio in, piano arrangement out</span>
+    <span>
+      <a href="https://github.com/sgoonjan/KeyArrange" target="_blank">GitHub →</a>
+    </span>
+  </footer>
+</div>
+<script>
+  const fileInput    = document.getElementById('fileInput');
+  const dropZone     = document.getElementById('dropZone');
+  const fileSelected = document.getElementById('fileSelected');
+  const fileName     = document.getElementById('fileName');
+  const fileClear    = document.getElementById('fileClear');
+  const arrangeBtn   = document.getElementById('arrangeBtn');
+  const statusCard   = document.getElementById('statusCard');
+  const errorCard    = document.getElementById('errorCard');
+  const results      = document.getElementById('resultsSection');
+  const midiPlayer   = document.getElementById('midiPlayer');
+  const midiDownload = document.getElementById('midiDownload');
+  let selectedFile = null;
+  dropZone.addEventListener('dragover', e => { e.preventDefault(); dropZone.classList.add('drag-over'); });
+  dropZone.addEventListener('dragleave', () => dropZone.classList.remove('drag-over'));
+  dropZone.addEventListener('drop', e => {
+    e.preventDefault();
+    dropZone.classList.remove('drag-over');
+    const f = e.dataTransfer.files[0];
+    if (f) setFile(f);
+  });
+  fileInput.addEventListener('change', () => {
+    if (fileInput.files[0]) setFile(fileInput.files[0]);
+  });
+  fileClear.addEventListener('click', clearFile);
+  function setFile(f) {
+    selectedFile = f;
+    fileName.textContent = f.name;
+    fileSelected.classList.add('visible');
+    arrangeBtn.disabled = false;
+    hide(errorCard); hide(results); hide(statusCard);
+  }
+  function clearFile() {
+    selectedFile = null;
+    fileInput.value = '';
+    fileSelected.classList.remove('visible');
+    arrangeBtn.disabled = true;
+  }
+  arrangeBtn.addEventListener('click', async () => {
+    if (!selectedFile) return;
+    hide(errorCard); hide(results);
+    show(statusCard);
+    arrangeBtn.disabled = true;
+    const form = new FormData();
+    form.append('file', selectedFile);
+    try {
+      const res = await fetch('/upload', { method: 'POST', body: form });
+      const data = await res.json();
+      if (!res.ok) throw new Error(data.detail || 'Something went wrong.');
+      hide(statusCard);
+      show(results);
+      // Setting src on midi-player triggers it to fetch the MIDI and load it
+      // into both the player controls and the linked visualizer automatically.
+      midiPlayer.setAttribute('src', data.midi_url);
+      midiDownload.href = data.midi_url;
+    } catch (err) {
+      hide(statusCard);
+      errorCard.textContent = '⚠ ' + err.message;
+      show(errorCard);
+    } finally {
+      arrangeBtn.disabled = false;
+    }
+  });
+  function show(el) { el.classList.add('visible'); }
+  function hide(el) { el.classList.remove('visible'); }
+</script>
+</body>
+</html>