root
commited on
Commit
·
3feaaf4
1
Parent(s):
173048d
ss
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ import numpy as np
|
|
6 |
import re
|
7 |
import pronouncing # Add this to requirements.txt for syllable counting
|
8 |
import functools # Add this for lru_cache functionality
|
|
|
9 |
from transformers import (
|
10 |
AutoModelForAudioClassification,
|
11 |
AutoFeatureExtractor,
|
@@ -2298,6 +2299,109 @@ Improved lyrics with fixed rhythm:
|
|
2298 |
|
2299 |
return lyrics
|
2300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2301 |
def process_audio(audio_file):
|
2302 |
"""Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
|
2303 |
if audio_file is None:
|
@@ -2524,7 +2628,17 @@ def process_audio(audio_file):
|
|
2524 |
except Exception as e:
|
2525 |
error_msg = f"Error processing audio: {str(e)}"
|
2526 |
print(error_msg)
|
2527 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2528 |
|
2529 |
# Create enhanced Gradio interface with tabs for better organization
|
2530 |
with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
@@ -2569,6 +2683,290 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
2569 |
with gr.TabItem("Rhythm Analysis"):
|
2570 |
rhythm_analysis_output = gr.Textbox(label="Syllable-Beat Alignment Analysis", lines=16)
|
2571 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2572 |
with gr.TabItem("Syllable Analysis"):
|
2573 |
syllable_analysis_output = gr.Textbox(label="Detailed Syllable Analysis", lines=16)
|
2574 |
prompt_template_output = gr.Textbox(label="Prompt Template", lines=16)
|
@@ -2576,7 +2974,12 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
2576 |
# Processing function with better handling of results
|
2577 |
def display_results(audio_file):
|
2578 |
if audio_file is None:
|
2579 |
-
return "Please upload an audio file.", "No emotion analysis available.", "No audio classification available.", "No lyrics generated.", "No rhythm analysis available.",
|
|
|
|
|
|
|
|
|
|
|
2580 |
|
2581 |
try:
|
2582 |
# Process audio and get results
|
@@ -2584,9 +2987,19 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
2584 |
|
2585 |
# Check if we got an error message instead of results
|
2586 |
if isinstance(results, str) and "Error" in results:
|
2587 |
-
return results, "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available",
|
|
|
|
|
|
|
|
|
|
|
2588 |
elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
|
2589 |
-
return results[0], "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available",
|
|
|
|
|
|
|
|
|
|
|
2590 |
|
2591 |
# For backwards compatibility, handle both dictionary and tuple returns
|
2592 |
if isinstance(results, dict):
|
@@ -2673,19 +3086,87 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
2673 |
else:
|
2674 |
ast_text = "No valid audio classification results available."
|
2675 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2676 |
# Return all results including new fields
|
2677 |
-
return genre_results, emotion_text, ast_text, clean_lyrics, rhythm_analysis, syllable_analysis, prompt_template
|
2678 |
|
2679 |
except Exception as e:
|
2680 |
error_msg = f"Error: {str(e)}"
|
2681 |
print(error_msg)
|
2682 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2683 |
|
2684 |
# Connect the button to the display function with updated outputs
|
2685 |
submit_btn.click(
|
2686 |
fn=display_results,
|
2687 |
inputs=[audio_input],
|
2688 |
-
outputs=[genre_output, emotion_output, ast_output, lyrics_output, rhythm_analysis_output, syllable_analysis_output, prompt_template_output]
|
2689 |
)
|
2690 |
|
2691 |
# Enhanced explanation of how the system works
|
@@ -2722,6 +3203,12 @@ with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
2722 |
|
2723 |
8. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
|
2724 |
|
|
|
|
|
|
|
|
|
|
|
|
|
2725 |
This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
|
2726 |
""")
|
2727 |
|
|
|
6 |
import re
|
7 |
import pronouncing # Add this to requirements.txt for syllable counting
|
8 |
import functools # Add this for lru_cache functionality
|
9 |
+
import json # Add this for JSON serialization
|
10 |
from transformers import (
|
11 |
AutoModelForAudioClassification,
|
12 |
AutoFeatureExtractor,
|
|
|
2299 |
|
2300 |
return lyrics
|
2301 |
|
2302 |
+
def prepare_beat_synced_lyrics(audio_data, lyrics, beats_info):
|
2303 |
+
"""
|
2304 |
+
Prepare data for the beat-synced lyrics viewer
|
2305 |
+
|
2306 |
+
Parameters:
|
2307 |
+
audio_data: Dictionary containing audio features
|
2308 |
+
lyrics: String containing generated lyrics
|
2309 |
+
beats_info: Dictionary containing beat analysis data
|
2310 |
+
|
2311 |
+
Returns:
|
2312 |
+
Dictionary containing data for the beat-synced lyrics viewer
|
2313 |
+
"""
|
2314 |
+
# Extract necessary data for visualization
|
2315 |
+
beat_times = beats_info.get("beat_times", [])
|
2316 |
+
beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
|
2317 |
+
tempo = beats_info.get("tempo", 120)
|
2318 |
+
|
2319 |
+
# Clean lyrics - remove section markers and annotations
|
2320 |
+
clean_lyrics = lyrics
|
2321 |
+
if isinstance(lyrics, str):
|
2322 |
+
# Remove "[Verse]", "[Chorus]", etc.
|
2323 |
+
clean_lyrics = re.sub(r'\[\w+\]', '', lyrics)
|
2324 |
+
# Remove any rhythm analysis notes
|
2325 |
+
if "[Note:" in clean_lyrics:
|
2326 |
+
clean_lyrics = clean_lyrics.split("[Note:")[0].strip()
|
2327 |
+
# Remove any rhythm analysis section
|
2328 |
+
if "[RHYTHM_ANALYSIS_SECTION]" in clean_lyrics:
|
2329 |
+
clean_lyrics = clean_lyrics.split("[RHYTHM_ANALYSIS_SECTION]")[0].strip()
|
2330 |
+
|
2331 |
+
# Split into lines
|
2332 |
+
lines = [line.strip() for line in clean_lyrics.split('\n') if line.strip()]
|
2333 |
+
|
2334 |
+
# Split each line into words and estimate timing
|
2335 |
+
lyrics_data = []
|
2336 |
+
|
2337 |
+
# Estimate start time for lyrics - allow a small intro period
|
2338 |
+
lyrics_start_time = beat_times[0] if len(beat_times) > 0 else 0
|
2339 |
+
|
2340 |
+
# Simple approach: distribute lines evenly across available beats
|
2341 |
+
if len(lines) > 0 and len(beat_times) > 0:
|
2342 |
+
beats_per_line = max(1, len(beat_times) // len(lines))
|
2343 |
+
|
2344 |
+
for i, line in enumerate(lines):
|
2345 |
+
# Determine beat range for this line
|
2346 |
+
start_beat_idx = min(i * beats_per_line, len(beat_times) - 1)
|
2347 |
+
end_beat_idx = min(start_beat_idx + beats_per_line, len(beat_times) - 1)
|
2348 |
+
|
2349 |
+
# Get time range
|
2350 |
+
line_start_time = beat_times[start_beat_idx]
|
2351 |
+
line_end_time = beat_times[end_beat_idx] if end_beat_idx < len(beat_times) else audio_data["duration"]
|
2352 |
+
|
2353 |
+
# Split line into words
|
2354 |
+
words = re.findall(r'\b\w+\b|-|\s+|[^\w\s]', line)
|
2355 |
+
filtered_words = [w for w in words if w.strip()]
|
2356 |
+
|
2357 |
+
if filtered_words:
|
2358 |
+
# Distribute words across beats for this line
|
2359 |
+
word_data = []
|
2360 |
+
|
2361 |
+
# Get beat times for this line
|
2362 |
+
line_beat_times = beat_times[start_beat_idx:end_beat_idx+1]
|
2363 |
+
if len(line_beat_times) < 2:
|
2364 |
+
line_beat_times = [line_start_time, line_end_time]
|
2365 |
+
|
2366 |
+
# Distribute words evenly if we have enough beats
|
2367 |
+
if len(filtered_words) <= len(line_beat_times):
|
2368 |
+
for j, word in enumerate(filtered_words):
|
2369 |
+
beat_idx = min(j, len(line_beat_times) - 1)
|
2370 |
+
word_time = line_beat_times[beat_idx]
|
2371 |
+
word_data.append({
|
2372 |
+
"text": word,
|
2373 |
+
"time": word_time,
|
2374 |
+
"is_strong": j == 0 or word[0].isupper() # Simple heuristic for strong beats
|
2375 |
+
})
|
2376 |
+
else:
|
2377 |
+
# More words than beats, distribute evenly
|
2378 |
+
word_duration = (line_end_time - line_start_time) / len(filtered_words)
|
2379 |
+
for j, word in enumerate(filtered_words):
|
2380 |
+
word_time = line_start_time + j * word_duration
|
2381 |
+
word_data.append({
|
2382 |
+
"text": word,
|
2383 |
+
"time": word_time,
|
2384 |
+
"is_strong": j == 0 or word[0].isupper()
|
2385 |
+
})
|
2386 |
+
|
2387 |
+
lyrics_data.append({
|
2388 |
+
"line": line,
|
2389 |
+
"start_time": line_start_time,
|
2390 |
+
"end_time": line_end_time,
|
2391 |
+
"words": word_data
|
2392 |
+
})
|
2393 |
+
|
2394 |
+
# Create visualization data
|
2395 |
+
visualization_data = {
|
2396 |
+
"duration": audio_data["duration"],
|
2397 |
+
"tempo": tempo,
|
2398 |
+
"beat_times": beat_times,
|
2399 |
+
"beat_strengths": beat_strengths,
|
2400 |
+
"lyrics_data": lyrics_data
|
2401 |
+
}
|
2402 |
+
|
2403 |
+
return visualization_data
|
2404 |
+
|
2405 |
def process_audio(audio_file):
|
2406 |
"""Main function to process audio file, classify genre, and generate lyrics with enhanced rhythm analysis."""
|
2407 |
if audio_file is None:
|
|
|
2628 |
except Exception as e:
|
2629 |
error_msg = f"Error processing audio: {str(e)}"
|
2630 |
print(error_msg)
|
2631 |
+
|
2632 |
+
# Use a raw string literal to avoid f-string backslash issues
|
2633 |
+
error_html = """<script>
|
2634 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
2635 |
+
document.getElementById('loading-message').style.display = 'block';
|
2636 |
+
document.getElementById('loading-message').innerText = 'Error processing audio';
|
2637 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
2638 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
2639 |
+
</script>"""
|
2640 |
+
|
2641 |
+
return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", audio_file, error_html, "No syllable analysis available", "No prompt template available"
|
2642 |
|
2643 |
# Create enhanced Gradio interface with tabs for better organization
|
2644 |
with gr.Blocks(title="Music Genre Classifier & Lyrics Generator") as demo:
|
|
|
2683 |
with gr.TabItem("Rhythm Analysis"):
|
2684 |
rhythm_analysis_output = gr.Textbox(label="Syllable-Beat Alignment Analysis", lines=16)
|
2685 |
|
2686 |
+
with gr.TabItem("Beat-Synced Lyrics"):
|
2687 |
+
# Create a container for the beat-synced lyrics viewer
|
2688 |
+
synced_audio_output = gr.Audio(label="Playback with Synced Lyrics", type="filepath")
|
2689 |
+
|
2690 |
+
# Create a custom JavaScript component for the beat-synced lyrics viewer
|
2691 |
+
lyrics_viewer_html = gr.HTML(
|
2692 |
+
"""
|
2693 |
+
<div id="beat-sync-container" style="width:100%; height:400px; position:relative;">
|
2694 |
+
<div id="loading-message">Please analyze audio to view beat-synced lyrics</div>
|
2695 |
+
<div id="beat-sync-timeline" style="display:none; width:100%; height:80px; background:#f0f0f0; position:relative; overflow:hidden; margin-bottom:10px;">
|
2696 |
+
<div id="playhead" style="position:absolute; width:2px; height:100%; background:red; top:0; left:0; z-index:10;"></div>
|
2697 |
+
<div id="beat-markers" style="position:absolute; width:100%; height:100%; top:0; left:0;"></div>
|
2698 |
+
</div>
|
2699 |
+
<div id="lyrics-display" style="display:none; width:100%; height:300px; overflow-y:auto; font-size:16px; line-height:1.6;"></div>
|
2700 |
+
</div>
|
2701 |
+
|
2702 |
+
<script>
|
2703 |
+
let beatSyncData = null;
|
2704 |
+
let isPlaying = false;
|
2705 |
+
let audioElement = null;
|
2706 |
+
let playheadInterval = null;
|
2707 |
+
let lastHighlightedWord = -1;
|
2708 |
+
|
2709 |
+
function initBeatSyncViewer(data) {
|
2710 |
+
beatSyncData = data;
|
2711 |
+
const container = document.getElementById('beat-sync-container');
|
2712 |
+
const timeline = document.getElementById('beat-sync-timeline');
|
2713 |
+
const beatMarkers = document.getElementById('beat-markers');
|
2714 |
+
const lyricsDisplay = document.getElementById('lyrics-display');
|
2715 |
+
const loadingMessage = document.getElementById('loading-message');
|
2716 |
+
|
2717 |
+
// Clear previous content
|
2718 |
+
beatMarkers.innerHTML = '';
|
2719 |
+
lyricsDisplay.innerHTML = '';
|
2720 |
+
|
2721 |
+
// Show the viewer elements, hide loading message
|
2722 |
+
loadingMessage.style.display = 'none';
|
2723 |
+
timeline.style.display = 'block';
|
2724 |
+
lyricsDisplay.style.display = 'block';
|
2725 |
+
|
2726 |
+
// Create beat markers
|
2727 |
+
const duration = data.duration;
|
2728 |
+
const beatTimes = data.beat_times;
|
2729 |
+
const beatStrengths = data.beat_strengths;
|
2730 |
+
|
2731 |
+
if (beatTimes && beatTimes.length > 0) {
|
2732 |
+
for (let i = 0; i < beatTimes.length; i++) {
|
2733 |
+
const beatTime = beatTimes[i];
|
2734 |
+
const beatStrength = beatStrengths && beatStrengths[i] ? beatStrengths[i] : 1.0;
|
2735 |
+
const position = (beatTime / duration) * 100;
|
2736 |
+
|
2737 |
+
// Create marker with height based on beat strength
|
2738 |
+
const marker = document.createElement('div');
|
2739 |
+
const height = 30 + (beatStrength * 50); // Scale between 30-80px
|
2740 |
+
|
2741 |
+
marker.className = 'beat-marker';
|
2742 |
+
marker.style.position = 'absolute';
|
2743 |
+
marker.style.left = `${position}%`;
|
2744 |
+
marker.style.top = `${(80 - height) / 2}px`;
|
2745 |
+
marker.style.width = '2px';
|
2746 |
+
marker.style.height = `${height}px`;
|
2747 |
+
marker.style.background = beatStrength > 0.7 ? '#2d7dd2' : '#97c6e3';
|
2748 |
+
marker.setAttribute('data-time', beatTime);
|
2749 |
+
|
2750 |
+
beatMarkers.appendChild(marker);
|
2751 |
+
}
|
2752 |
+
}
|
2753 |
+
|
2754 |
+
// Create lyrics display
|
2755 |
+
if (data.lyrics_data && data.lyrics_data.length > 0) {
|
2756 |
+
for (let i = 0; i < data.lyrics_data.length; i++) {
|
2757 |
+
const line = data.lyrics_data[i];
|
2758 |
+
const lineElement = document.createElement('div');
|
2759 |
+
lineElement.className = 'lyric-line';
|
2760 |
+
lineElement.style.marginBottom = '15px';
|
2761 |
+
|
2762 |
+
// Create word elements for the line
|
2763 |
+
line.words.forEach((word, j) => {
|
2764 |
+
const wordSpan = document.createElement('span');
|
2765 |
+
wordSpan.innerText = word.text + ' ';
|
2766 |
+
wordSpan.className = 'lyric-word';
|
2767 |
+
wordSpan.style.display = 'inline-block';
|
2768 |
+
wordSpan.style.transition = 'color 0.1s, transform 0.1s';
|
2769 |
+
wordSpan.setAttribute('data-time', word.time);
|
2770 |
+
wordSpan.setAttribute('data-word-index', j);
|
2771 |
+
wordSpan.setAttribute('data-line-index', i);
|
2772 |
+
|
2773 |
+
if (word.is_strong) {
|
2774 |
+
wordSpan.style.fontWeight = 'bold';
|
2775 |
+
}
|
2776 |
+
|
2777 |
+
lineElement.appendChild(wordSpan);
|
2778 |
+
});
|
2779 |
+
|
2780 |
+
lyricsDisplay.appendChild(lineElement);
|
2781 |
+
}
|
2782 |
+
} else {
|
2783 |
+
lyricsDisplay.innerHTML = '<p>No lyrics data available or could not align lyrics with beats.</p>';
|
2784 |
+
}
|
2785 |
+
|
2786 |
+
// Add timeline click/drag handler for scrubbing
|
2787 |
+
timeline.addEventListener('click', function(e) {
|
2788 |
+
if (!audioElement) return;
|
2789 |
+
|
2790 |
+
const rect = timeline.getBoundingClientRect();
|
2791 |
+
const clickPosition = (e.clientX - rect.left) / rect.width;
|
2792 |
+
const newTime = clickPosition * duration;
|
2793 |
+
|
2794 |
+
// Set audio to new position
|
2795 |
+
audioElement.currentTime = newTime;
|
2796 |
+
|
2797 |
+
// Update playhead and lyrics
|
2798 |
+
updatePlayhead(newTime);
|
2799 |
+
highlightLyricsAtTime(newTime);
|
2800 |
+
});
|
2801 |
+
}
|
2802 |
+
|
2803 |
+
function connectAudio(audioSelector) {
|
2804 |
+
// Find the audio element from Gradio's component
|
2805 |
+
const audioContainer = document.querySelector(audioSelector);
|
2806 |
+
if (!audioContainer) return;
|
2807 |
+
|
2808 |
+
audioElement = audioContainer.querySelector('audio');
|
2809 |
+
if (!audioElement) return;
|
2810 |
+
|
2811 |
+
// Add event listeners to the audio element
|
2812 |
+
audioElement.addEventListener('play', startPlayheadMovement);
|
2813 |
+
audioElement.addEventListener('pause', stopPlayheadMovement);
|
2814 |
+
audioElement.addEventListener('ended', stopPlayheadMovement);
|
2815 |
+
audioElement.addEventListener('seeked', function() {
|
2816 |
+
updatePlayhead(audioElement.currentTime);
|
2817 |
+
highlightLyricsAtTime(audioElement.currentTime);
|
2818 |
+
});
|
2819 |
+
}
|
2820 |
+
|
2821 |
+
function startPlayheadMovement() {
|
2822 |
+
isPlaying = true;
|
2823 |
+
if (playheadInterval) clearInterval(playheadInterval);
|
2824 |
+
|
2825 |
+
playheadInterval = setInterval(() => {
|
2826 |
+
if (!audioElement || !isPlaying) return;
|
2827 |
+
updatePlayhead(audioElement.currentTime);
|
2828 |
+
highlightLyricsAtTime(audioElement.currentTime);
|
2829 |
+
}, 50); // Update every 50ms
|
2830 |
+
}
|
2831 |
+
|
2832 |
+
function stopPlayheadMovement() {
|
2833 |
+
isPlaying = false;
|
2834 |
+
if (playheadInterval) {
|
2835 |
+
clearInterval(playheadInterval);
|
2836 |
+
playheadInterval = null;
|
2837 |
+
}
|
2838 |
+
}
|
2839 |
+
|
2840 |
+
function updatePlayhead(currentTime) {
|
2841 |
+
if (!beatSyncData) return;
|
2842 |
+
|
2843 |
+
const playhead = document.getElementById('playhead');
|
2844 |
+
const position = (currentTime / beatSyncData.duration) * 100;
|
2845 |
+
playhead.style.left = `${position}%`;
|
2846 |
+
}
|
2847 |
+
|
2848 |
+
function highlightLyricsAtTime(currentTime) {
|
2849 |
+
if (!beatSyncData || !beatSyncData.lyrics_data) return;
|
2850 |
+
|
2851 |
+
// Reset all word styling
|
2852 |
+
const words = document.querySelectorAll('.lyric-word');
|
2853 |
+
words.forEach(word => {
|
2854 |
+
word.style.color = 'black';
|
2855 |
+
word.style.transform = 'scale(1)';
|
2856 |
+
});
|
2857 |
+
|
2858 |
+
// Find the current word to highlight
|
2859 |
+
let currentWordElement = null;
|
2860 |
+
let bestTimeDiff = Infinity;
|
2861 |
+
|
2862 |
+
words.forEach(word => {
|
2863 |
+
const wordTime = parseFloat(word.getAttribute('data-time'));
|
2864 |
+
|
2865 |
+
// Highlight words that have already been passed or are coming up soon
|
2866 |
+
if (wordTime <= currentTime + 0.2) {
|
2867 |
+
const timeDiff = Math.abs(wordTime - currentTime);
|
2868 |
+
|
2869 |
+
// Mark past words as "read"
|
2870 |
+
if (wordTime < currentTime - 0.5) {
|
2871 |
+
word.style.color = '#666666';
|
2872 |
+
}
|
2873 |
+
|
2874 |
+
// Find the closest word to current time
|
2875 |
+
if (timeDiff < bestTimeDiff) {
|
2876 |
+
bestTimeDiff = timeDiff;
|
2877 |
+
currentWordElement = word;
|
2878 |
+
}
|
2879 |
+
}
|
2880 |
+
});
|
2881 |
+
|
2882 |
+
// Highlight current word
|
2883 |
+
if (currentWordElement) {
|
2884 |
+
currentWordElement.style.color = '#e63946';
|
2885 |
+
currentWordElement.style.transform = 'scale(1.1)';
|
2886 |
+
|
2887 |
+
// Scroll to keep the current line visible
|
2888 |
+
const lineIndex = parseInt(currentWordElement.getAttribute('data-line-index'));
|
2889 |
+
const lineElement = document.querySelectorAll('.lyric-line')[lineIndex];
|
2890 |
+
|
2891 |
+
if (lineElement) {
|
2892 |
+
const lyricsDisplay = document.getElementById('lyrics-display');
|
2893 |
+
const displayRect = lyricsDisplay.getBoundingClientRect();
|
2894 |
+
const lineRect = lineElement.getBoundingClientRect();
|
2895 |
+
|
2896 |
+
// Check if the line is outside the visible area
|
2897 |
+
if (lineRect.top < displayRect.top || lineRect.bottom > displayRect.bottom) {
|
2898 |
+
lineElement.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
2899 |
+
}
|
2900 |
+
}
|
2901 |
+
}
|
2902 |
+
}
|
2903 |
+
|
2904 |
+
// Wait for Gradio to fully load the components
|
2905 |
+
function waitForGradio() {
|
2906 |
+
// Connect to the audio element when available
|
2907 |
+
setTimeout(() => {
|
2908 |
+
connectAudio('#component-17'); // Replace with the actual selector
|
2909 |
+
|
2910 |
+
// Check for data updates from Gradio
|
2911 |
+
const observer = new MutationObserver((mutations) => {
|
2912 |
+
for (const mutation of mutations) {
|
2913 |
+
if (mutation.type === 'attributes' &&
|
2914 |
+
mutation.target.id === 'beat-sync-container' &&
|
2915 |
+
mutation.target.hasAttribute('data-sync-info')) {
|
2916 |
+
|
2917 |
+
const dataStr = mutation.target.getAttribute('data-sync-info');
|
2918 |
+
try {
|
2919 |
+
const data = JSON.parse(dataStr);
|
2920 |
+
initBeatSyncViewer(data);
|
2921 |
+
} catch (e) {
|
2922 |
+
console.error('Error parsing beat sync data:', e);
|
2923 |
+
}
|
2924 |
+
}
|
2925 |
+
}
|
2926 |
+
});
|
2927 |
+
|
2928 |
+
observer.observe(document.getElementById('beat-sync-container'), {
|
2929 |
+
attributes: true,
|
2930 |
+
attributeFilter: ['data-sync-info']
|
2931 |
+
});
|
2932 |
+
|
2933 |
+
// Try to find all audio elements and add a more robust connection method
|
2934 |
+
function tryConnectAudio() {
|
2935 |
+
const audioElements = document.querySelectorAll('audio');
|
2936 |
+
for (const audio of audioElements) {
|
2937 |
+
if (audio.parentElement.closest('#component-17') ||
|
2938 |
+
audio.parentElement.closest('.beat-synced-lyrics-tab')) {
|
2939 |
+
audioElement = audio;
|
2940 |
+
audioElement.addEventListener('play', startPlayheadMovement);
|
2941 |
+
audioElement.addEventListener('pause', stopPlayheadMovement);
|
2942 |
+
audioElement.addEventListener('ended', stopPlayheadMovement);
|
2943 |
+
audioElement.addEventListener('seeked', function() {
|
2944 |
+
updatePlayhead(audioElement.currentTime);
|
2945 |
+
highlightLyricsAtTime(audioElement.currentTime);
|
2946 |
+
});
|
2947 |
+
return true;
|
2948 |
+
}
|
2949 |
+
}
|
2950 |
+
return false;
|
2951 |
+
}
|
2952 |
+
|
2953 |
+
// Keep trying until we find the audio element
|
2954 |
+
if (!tryConnectAudio()) {
|
2955 |
+
setTimeout(tryConnectAudio, 1000); // Retry after 1 second
|
2956 |
+
}
|
2957 |
+
}, 2000);
|
2958 |
+
}
|
2959 |
+
|
2960 |
+
// Initialize when DOM is ready
|
2961 |
+
if (document.readyState === 'loading') {
|
2962 |
+
document.addEventListener('DOMContentLoaded', waitForGradio);
|
2963 |
+
} else {
|
2964 |
+
waitForGradio();
|
2965 |
+
}
|
2966 |
+
</script>
|
2967 |
+
"""
|
2968 |
+
)
|
2969 |
+
|
2970 |
with gr.TabItem("Syllable Analysis"):
|
2971 |
syllable_analysis_output = gr.Textbox(label="Detailed Syllable Analysis", lines=16)
|
2972 |
prompt_template_output = gr.Textbox(label="Prompt Template", lines=16)
|
|
|
2974 |
# Processing function with better handling of results
|
2975 |
def display_results(audio_file):
|
2976 |
if audio_file is None:
|
2977 |
+
return "Please upload an audio file.", "No emotion analysis available.", "No audio classification available.", "No lyrics generated.", "No rhythm analysis available.", audio_file, """<script>
|
2978 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
2979 |
+
document.getElementById('loading-message').style.display = 'block';
|
2980 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
2981 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
2982 |
+
</script>""", "No syllable analysis available.", "No prompt template available."
|
2983 |
|
2984 |
try:
|
2985 |
# Process audio and get results
|
|
|
2987 |
|
2988 |
# Check if we got an error message instead of results
|
2989 |
if isinstance(results, str) and "Error" in results:
|
2990 |
+
return results, "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", audio_file, """<script>
|
2991 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
2992 |
+
document.getElementById('loading-message').style.display = 'block';
|
2993 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
2994 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
2995 |
+
</script>""", "No syllable analysis available", "No prompt template available"
|
2996 |
elif isinstance(results, tuple) and isinstance(results[0], str) and "Error" in results[0]:
|
2997 |
+
return results[0], "Error in analysis", "Error in classification", "No lyrics generated", "No rhythm analysis available", audio_file, """<script>
|
2998 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
2999 |
+
document.getElementById('loading-message').style.display = 'block';
|
3000 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
3001 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
3002 |
+
</script>""", "No syllable analysis available", "No prompt template available"
|
3003 |
|
3004 |
# For backwards compatibility, handle both dictionary and tuple returns
|
3005 |
if isinstance(results, dict):
|
|
|
3086 |
else:
|
3087 |
ast_text = "No valid audio classification results available."
|
3088 |
|
3089 |
+
# Prepare beat-synced lyrics visualization data
|
3090 |
+
try:
|
3091 |
+
audio_data = extract_audio_features(audio_file)
|
3092 |
+
|
3093 |
+
# Get beat information
|
3094 |
+
y, sr = load_audio(audio_file, SAMPLE_RATE)
|
3095 |
+
beats_info = detect_beats(y, sr)
|
3096 |
+
|
3097 |
+
# Prepare data for beat-synced lyrics
|
3098 |
+
visualization_data = prepare_beat_synced_lyrics(audio_data, clean_lyrics, beats_info)
|
3099 |
+
|
3100 |
+
# Convert to JSON for JavaScript
|
3101 |
+
visualization_json = json.dumps(visualization_data)
|
3102 |
+
|
3103 |
+
# Create HTML with the data injected - avoid using f-string for the entire HTML
|
3104 |
+
# Handle string escaping separately to avoid f-string backslash issues
|
3105 |
+
escaped_json = visualization_json.replace("'", "\\'")
|
3106 |
+
|
3107 |
+
# Create HTML in parts to avoid f-string backslash issues
|
3108 |
+
html_start = """<div id="beat-sync-container" data-sync-info='"""
|
3109 |
+
html_middle = """' style="width:100%; height:400px; position:relative;">
|
3110 |
+
<div id="loading-message">Loading beat-synced lyrics viewer...</div>
|
3111 |
+
<div id="beat-sync-timeline" style="display:none; width:100%; height:80px; background:#f0f0f0; position:relative; overflow:hidden; margin-bottom:10px;">
|
3112 |
+
<div id="playhead" style="position:absolute; width:2px; height:100%; background:red; top:0; left:0; z-index:10;"></div>
|
3113 |
+
<div id="beat-markers" style="position:absolute; width:100%; height:100%; top:0; left:0;"></div>
|
3114 |
+
</div>
|
3115 |
+
<div id="lyrics-display" style="display:none; width:100%; height:300px; overflow-y:auto; font-size:16px; line-height:1.6;"></div>
|
3116 |
+
</div>
|
3117 |
+
<script>
|
3118 |
+
// Signal to the viewer that new data is available
|
3119 |
+
const container = document.getElementById('beat-sync-container');
|
3120 |
+
if (container) {
|
3121 |
+
// This will trigger the mutation observer
|
3122 |
+
container.setAttribute('data-sync-info', '"""
|
3123 |
+
html_end = """');
|
3124 |
+
}
|
3125 |
+
</script>"""
|
3126 |
+
|
3127 |
+
# Combine parts without using f-strings in the parts that don't need variables
|
3128 |
+
beat_sync_html = html_start + visualization_json + html_middle + escaped_json + html_end
|
3129 |
+
except Exception as e:
|
3130 |
+
print(f"Error creating beat-synced lyrics: {str(e)}")
|
3131 |
+
# Handle string escaping separately to avoid f-string backslash issues
|
3132 |
+
escaped_error = str(e).replace("'", "\\'")
|
3133 |
+
|
3134 |
+
# Use regular strings instead of f-strings to avoid backslash issues
|
3135 |
+
html_start = """<script>
|
3136 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
3137 |
+
document.getElementById('loading-message').style.display = 'block';
|
3138 |
+
document.getElementById('loading-message').innerText = 'Error creating beat-synced lyrics: """
|
3139 |
+
html_end = """';
|
3140 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
3141 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
3142 |
+
</script>"""
|
3143 |
+
|
3144 |
+
# Combine parts without using f-strings
|
3145 |
+
beat_sync_html = html_start + escaped_error + html_end
|
3146 |
+
|
3147 |
# Return all results including new fields
|
3148 |
+
return genre_results, emotion_text, ast_text, clean_lyrics, rhythm_analysis, audio_file, beat_sync_html, syllable_analysis, prompt_template
|
3149 |
|
3150 |
except Exception as e:
|
3151 |
error_msg = f"Error: {str(e)}"
|
3152 |
print(error_msg)
|
3153 |
+
|
3154 |
+
# Use a raw string literal to avoid f-string backslash issues
|
3155 |
+
error_html = """<script>
|
3156 |
+
document.getElementById('beat-sync-container').removeAttribute('data-sync-info');
|
3157 |
+
document.getElementById('loading-message').style.display = 'block';
|
3158 |
+
document.getElementById('loading-message').innerText = 'Error processing audio';
|
3159 |
+
document.getElementById('beat-sync-timeline').style.display = 'none';
|
3160 |
+
document.getElementById('lyrics-display').style.display = 'none';
|
3161 |
+
</script>"""
|
3162 |
+
|
3163 |
+
return error_msg, "Error in emotion analysis", "Error in audio classification", "No lyrics generated", "No rhythm analysis available", audio_file, error_html, "No syllable analysis available", "No prompt template available"
|
3164 |
|
3165 |
# Connect the button to the display function with updated outputs
|
3166 |
submit_btn.click(
|
3167 |
fn=display_results,
|
3168 |
inputs=[audio_input],
|
3169 |
+
outputs=[genre_output, emotion_output, ast_output, lyrics_output, rhythm_analysis_output, synced_audio_output, lyrics_viewer_html, syllable_analysis_output, prompt_template_output]
|
3170 |
)
|
3171 |
|
3172 |
# Enhanced explanation of how the system works
|
|
|
3203 |
|
3204 |
8. **Refinement**: If significant rhythm mismatches are detected, the system can automatically refine the lyrics for better alignment.
|
3205 |
|
3206 |
+
9. **Beat-Synced Visualization**: The beat-synced lyrics viewer shows you exactly how the lyrics align with the music:
|
3207 |
+
- Beat markers show the song's rhythmic structure
|
3208 |
+
- Words are highlighted in sync with the music
|
3209 |
+
- Strong beats and stressed syllables are emphasized
|
3210 |
+
- You can scrub through the song to see how lyrics and music match at any point
|
3211 |
+
|
3212 |
This multi-step process creates lyrics that feel naturally connected to the music, as if they were written specifically for it.
|
3213 |
""")
|
3214 |
|