| | |
| | """ |
| | Accuracy Testing Suite for Rescored Pipeline |
| | |
| | Tests transcription accuracy on 10 diverse piano videos covering different styles and complexities. |
| | """ |
| | import sys |
| | from pathlib import Path |
| | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| |
|
| | from pipeline import TranscriptionPipeline |
| | from app_config import settings |
| | import json |
| | from datetime import datetime |
| |
|
| |
|
| | |
| | TEST_VIDEOS = [ |
| | { |
| | "id": "simple_melody", |
| | "url": "https://www.youtube.com/watch?v=TK1Ij_-mank", |
| | "description": "Simple piano melody - C major scale practice", |
| | "difficulty": "easy", |
| | "expected_accuracy": ">80%", |
| | "notes": "Slow tempo, single notes, clear recording" |
| | }, |
| | { |
| | "id": "twinkle_twinkle", |
| | "url": "https://www.youtube.com/watch?v=YCZ_d_4ZEqk", |
| | "description": "Twinkle Twinkle Little Star - Beginner piano", |
| | "difficulty": "easy", |
| | "expected_accuracy": ">75%", |
| | "notes": "Very simple melody, slow tempo" |
| | }, |
| | { |
| | "id": "fur_elise", |
| | "url": "https://www.youtube.com/watch?v=_mVW8tgGY_w", |
| | "description": "Beethoven - Für Elise (simplified)", |
| | "difficulty": "medium", |
| | "expected_accuracy": "60-70%", |
| | "notes": "Classic piece, moderate tempo, some ornaments" |
| | }, |
| | { |
| | "id": "chopin_nocturne", |
| | "url": "https://www.youtube.com/watch?v=9E6b3swbnWg", |
| | "description": "Chopin - Nocturne Op. 9 No. 2", |
| | "difficulty": "hard", |
| | "expected_accuracy": "50-60%", |
| | "notes": "Complex harmonies, expressive dynamics, rubato" |
| | }, |
| | { |
| | "id": "canon_in_d", |
| | "url": "https://www.youtube.com/watch?v=NlprozGcs80", |
| | "description": "Pachelbel - Canon in D (piano arrangement)", |
| | "difficulty": "medium", |
| | "expected_accuracy": "60-70%", |
| | "notes": "Repetitive patterns, moderate polyphony" |
| | }, |
| | { |
| | "id": "river_flows", |
| | "url": "https://www.youtube.com/watch?v=7maJOI3QMu0", |
| | "description": "Yiruma - River Flows in You", |
| | "difficulty": "medium", |
| | "expected_accuracy": "60-70%", |
| | "notes": "Modern piano, flowing arpeggios" |
| | }, |
| | { |
| | "id": "moonlight_sonata", |
| | "url": "https://www.youtube.com/watch?v=4Tr0otuiQuU", |
| | "description": "Beethoven - Moonlight Sonata (1st movement)", |
| | "difficulty": "medium", |
| | "expected_accuracy": "60-70%", |
| | "notes": "Slow tempo, triplet arpeggios, bass notes" |
| | }, |
| | { |
| | "id": "jazz_blues", |
| | "url": "https://www.youtube.com/watch?v=F3W_alUuFkA", |
| | "description": "Simple jazz blues piano", |
| | "difficulty": "medium", |
| | "expected_accuracy": "55-65%", |
| | "notes": "Swing rhythm, blue notes, syncopation" |
| | }, |
| | { |
| | "id": "claire_de_lune", |
| | "url": "https://www.youtube.com/watch?v=WNcsUNKlAKw", |
| | "description": "Debussy - Clair de Lune", |
| | "difficulty": "hard", |
| | "expected_accuracy": "50-60%", |
| | "notes": "Impressionist harmony, complex textures" |
| | }, |
| | { |
| | "id": "la_campanella", |
| | "url": "https://www.youtube.com/watch?v=MD6xMyuZls0", |
| | "description": "Liszt - La Campanella", |
| | "difficulty": "very_hard", |
| | "expected_accuracy": "40-50%", |
| | "notes": "Virtuosic, extremely fast, wide range, many notes" |
| | } |
| | ] |
| |
|
| |
|
| | def run_accuracy_test(video, verbose=True): |
| | """ |
| | Run transcription pipeline on a test video and collect metrics. |
| | |
| | Args: |
| | video: Dictionary with video metadata |
| | verbose: Print progress messages |
| | |
| | Returns: |
| | Dictionary with test results and metrics |
| | """ |
| | if verbose: |
| | print(f"\n{'='*70}") |
| | print(f"Testing: {video['description']}") |
| | print(f"Difficulty: {video['difficulty']} | Expected: {video['expected_accuracy']}") |
| | print(f"{'='*70}") |
| |
|
| | job_id = f"accuracy_test_{video['id']}" |
| | storage_path = Path(settings.storage_path) |
| |
|
| | |
| | def progress_callback(percent, stage, message): |
| | if verbose: |
| | print(f"[{percent:3d}%] {stage:12s} | {message}") |
| |
|
| | result = { |
| | "video_id": video["id"], |
| | "description": video["description"], |
| | "difficulty": video["difficulty"], |
| | "url": video["url"], |
| | "timestamp": datetime.utcnow().isoformat(), |
| | "success": False, |
| | "error": None, |
| | "metrics": {} |
| | } |
| |
|
| | try: |
| | |
| | pipeline = TranscriptionPipeline(job_id, video["url"], storage_path) |
| | pipeline.set_progress_callback(progress_callback) |
| |
|
| | musicxml_path = pipeline.run() |
| |
|
| | |
| | temp_dir = pipeline.temp_dir |
| | original_audio = temp_dir / "audio.wav" |
| | other_stem = temp_dir / "htdemucs" / job_id / "other.wav" |
| | midi_path = temp_dir / "other_basic_pitch.mid" |
| | clean_midi = temp_dir / "piano_clean.mid" |
| |
|
| | |
| | import soundfile as sf |
| | import mido |
| |
|
| | |
| | if original_audio.exists(): |
| | audio_data, sr = sf.read(original_audio) |
| | result["metrics"]["audio_duration_seconds"] = len(audio_data) / sr |
| |
|
| | |
| | if original_audio.exists() and other_stem.exists(): |
| | import numpy as np |
| | original_data, _ = sf.read(original_audio) |
| | other_data, _ = sf.read(other_stem) |
| |
|
| | original_energy = np.sum(original_data ** 2) |
| | other_energy = np.sum(other_data ** 2) |
| |
|
| | result["metrics"]["separation"] = { |
| | "other_energy_ratio": other_energy / original_energy if original_energy > 0 else 0 |
| | } |
| |
|
| | |
| | if clean_midi.exists(): |
| | mid = mido.MidiFile(clean_midi) |
| | note_count = sum(1 for track in mid.tracks for msg in track if msg.type == 'note_on') |
| |
|
| | result["metrics"]["midi"] = { |
| | "total_notes": note_count, |
| | "duration_seconds": mid.length |
| | } |
| |
|
| | |
| | if musicxml_path.exists(): |
| | from music21 import converter |
| | score = converter.parse(musicxml_path) |
| | measures = score.parts[0].getElementsByClass('Measure') if score.parts else [] |
| |
|
| | result["metrics"]["musicxml"] = { |
| | "total_measures": len(measures), |
| | "file_size_kb": musicxml_path.stat().st_size / 1024 |
| | } |
| |
|
| | result["success"] = True |
| | result["output_files"] = { |
| | "musicxml": str(musicxml_path), |
| | "midi": str(clean_midi), |
| | "temp_dir": str(temp_dir) |
| | } |
| |
|
| | if verbose: |
| | print(f"\n✅ SUCCESS - Output: {musicxml_path}") |
| | print(f" MIDI notes: {result['metrics']['midi']['total_notes']}") |
| | print(f" Measures: {result['metrics']['musicxml']['total_measures']}") |
| | if 'separation' in result['metrics']: |
| | sep = result['metrics']['separation'] |
| | print(f" Separation: {sep['other_energy_ratio']:.1%} energy in 'other' stem") |
| |
|
| | except Exception as e: |
| | result["error"] = str(e) |
| | if verbose: |
| | print(f"\n❌ FAILED - Error: {e}") |
| |
|
| | return result |
| |
|
| |
|
| | def main(): |
| | """Run accuracy tests on all test videos.""" |
| | print("="*70) |
| | print("Rescored Accuracy Testing Suite") |
| | print("="*70) |
| | print(f"Testing {len(TEST_VIDEOS)} videos with varying difficulty") |
| | print(f"Storage: {settings.storage_path}") |
| | print() |
| |
|
| | |
| | results = [] |
| | for i, video in enumerate(TEST_VIDEOS, 1): |
| | print(f"\n[{i}/{len(TEST_VIDEOS)}] Starting test: {video['id']}") |
| | result = run_accuracy_test(video, verbose=True) |
| | results.append(result) |
| |
|
| | |
| | print("\n" + "="*70) |
| | print("ACCURACY TEST SUMMARY") |
| | print("="*70) |
| |
|
| | successful = [r for r in results if r["success"]] |
| | failed = [r for r in results if not r["success"]] |
| |
|
| | print(f"\nTotal: {len(results)} | Success: {len(successful)} | Failed: {len(failed)}") |
| | print(f"Success Rate: {len(successful)/len(results)*100:.1f}%") |
| |
|
| | if successful: |
| | print("\n✅ Successful Transcriptions:") |
| | for r in successful: |
| | midi_notes = r["metrics"]["midi"]["total_notes"] |
| | measures = r["metrics"]["musicxml"]["total_measures"] |
| | print(f" - {r['video_id']:20s} | {midi_notes:4d} notes | {measures:3d} measures | {r['difficulty']}") |
| |
|
| | if failed: |
| | print("\n❌ Failed Transcriptions:") |
| | for r in failed: |
| | print(f" - {r['video_id']:20s} | Error: {r['error'][:60]}") |
| |
|
| | |
| | output_path = Path(settings.storage_path) / "accuracy_test_results.json" |
| | output_path.parent.mkdir(parents=True, exist_ok=True) |
| |
|
| | with open(output_path, 'w') as f: |
| | json.dump({ |
| | "test_date": datetime.utcnow().isoformat(), |
| | "total_tests": len(results), |
| | "successful": len(successful), |
| | "failed": len(failed), |
| | "success_rate": len(successful) / len(results), |
| | "results": results |
| | }, f, indent=2) |
| |
|
| | print(f"\n📊 Full results saved to: {output_path}") |
| |
|
| | return 0 if not failed else 1 |
| |
|
| |
|
| | if __name__ == "__main__": |
| | sys.exit(main()) |
| |
|