SingA / app.py
latterworks's picture
Update app.py
ce13058 verified
raw
history blame
7.84 kB
import gradio as gr
from pathlib import Path
import yt_dlp
import logging
import librosa
import numpy as np
from PIL import Image
import ffmpeg
import shutil
import tempfile
import time
# Set up logging for debugging
logging.basicConfig(level=logging.DEBUG)
def analyze_audio(youtube_url, input_text, input_image=None, slider_value=50, checkbox_value=False):
"""
Downloads YouTube audio, performs automatic audio feature analysis with librosa, and processes inputs.
Automatically handles file and folder management.
Args:
youtube_url (str): YouTube video URL (optional).
input_text (str): Text input for processing.
input_image (PIL.Image, optional): Image input for processing.
slider_value (float): Numerical parameter (e.g., analysis threshold).
checkbox_value (bool): Toggle for enhanced analysis.
Returns:
tuple: (processed_text, output_image_display, output_audio, extra_info)
"""
# Create a unique temporary directory for this run
temp_dir = Path(tempfile.mkdtemp(prefix="audio_analysis_"))
output_dir = temp_dir / "downloaded_media"
output_dir.mkdir(parents=True, exist_ok=True)
logging.debug(f"Created temporary directory: {temp_dir}, output directory: {output_dir}")
try:
# Initialize outputs
processed_text = f"Processed: '{input_text}'."
output_image_display = input_image
output_audio = None
extra_info = f"Threshold: {slider_value/100:.2f}"
# Handle YouTube download if URL is provided
if youtube_url:
try:
# Validate YouTube URL
if not youtube_url.startswith(("https://www.youtube.com/", "https://youtu.be/")):
return "Error: Invalid YouTube URL", None, None, "Processing failed."
# YouTube download options (audio only)
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': str(output_dir / '%(title)s.%(ext)s'),
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'restrictfilenames': True,
'noplaylist': True,
}
# Download audio
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(youtube_url, download=True)
audio_file = output_dir / f"{info['title']}.mp3"
logging.debug(f"Downloaded audio: {audio_file}")
output_audio = str(audio_file)
# Perform automatic audio feature analysis with librosa
y, sr = librosa.load(audio_file)
hop_length = 512 # Valid hop_length to fix "Invalid hop_length: 0" error
logging.debug(f"Using hop_length: {hop_length}")
# Extract features
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=hop_length)
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=hop_length)
tempo, _ = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length)
# Aggregate features
mfcc_mean = np.mean(mfcc, axis=1).tolist()[:3] # Mean of first 3 MFCC coefficients
spectral_centroid_mean = np.mean(spectral_centroid)
features_summary = (
f"Audio Features: MFCC (mean of first 3 coeffs): {mfcc_mean}, "
f"Spectral Centroid: {spectral_centroid_mean:.2f} Hz, "
f"Tempo: {tempo:.2f} BPM"
)
processed_text += f" {features_summary}."
extra_info += f", Audio: {audio_file.name}"
except Exception as e:
logging.error(f"YouTube download or audio processing error: {str(e)}")
processed_text += f" Error processing YouTube audio: {str(e)}."
# Handle image processing if provided
if input_image is not None:
from PIL import ImageEnhance
enhancer = ImageEnhance.Brightness(input_image)
output_image_display = enhancer.enhance(1.5)
processed_text += " Image processed (brightened)."
else:
processed_text += " No image provided."
# Incorporate slider and checkbox
processed_text += f" Slider: {slider_value}, Enhanced Analysis: {checkbox_value}."
if checkbox_value:
processed_text += " Enhanced analysis enabled."
if youtube_url and slider_value > 50:
processed_text += f" High threshold ({slider_value}) applied for deeper analysis."
return processed_text, output_image_display, output_audio, extra_info
except Exception as e:
logging.error(f"Error in analyze_audio: {str(e)}")
return f"Error: {str(e)}", None, None, "Processing failed."
finally:
# Clean up temporary directory after a delay to ensure file access
try:
time.sleep(1) # Brief delay to ensure Gradio can serve the audio file
if temp_dir.exists():
shutil.rmtree(temp_dir)
logging.debug(f"Cleaned up temporary directory: {temp_dir}")
except Exception as e:
logging.error(f"Error cleaning up temporary directory: {str(e)}")
# Define input components
input_youtube_url = gr.Textbox(
label="YouTube Video URL",
placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ",
info="Optional: Enter a YouTube URL to download and analyze audio."
)
input_text_component = gr.Textbox(
label="Input Text",
placeholder="e.g., Analyze this audio track",
info="Type a description or query for processing."
)
input_image_component = gr.Image(
type="pil",
label="Upload Image (Optional)",
sources=["upload", "webcam", "clipboard"]
)
input_slider_component = gr.Slider(
minimum=0,
maximum=100,
value=50,
step=1,
label="Analysis Threshold",
info="Adjusts sensitivity of audio feature analysis."
)
input_checkbox_component = gr.Checkbox(
label="Enable Enhanced Analysis",
info="Toggle for deeper audio feature extraction."
)
# Define output components
output_text_component = gr.Textbox(
label="Analysis Results",
info="Text results including audio feature analysis."
)
output_image_component = gr.Image(
label="Processed Image (if any)",
info="Processed image output (if provided)."
)
output_audio_component = gr.Audio(
label="Downloaded Audio",
type="filepath",
info="Audio downloaded from YouTube."
)
output_label_component = gr.Label(
label="Analysis Summary",
info="Feature analysis details and processing info."
)
# Create the Gradio interface
iface = gr.Interface(
fn=analyze_audio,
inputs=[
input_youtube_url,
input_text_component,
input_image_component,
input_slider_component,
input_checkbox_component
],
outputs=[
output_text_component,
output_image_component,
output_audio_component,
output_label_component
],
title="YouTube Audio Feature Analysis",
description="Download YouTube audio, analyze features with librosa, and process text/image inputs. Customize with slider and checkbox.",
examples=[
["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "Analyze this track", None, 75, True],
[None, "Describe a music track", None, 30, False],
["https://www.youtube.com/watch?v=9bZkp7q19f0", "Extract audio features", None, 60, True]
],
allow_flagging="never",
theme=gr.themes.Soft()
)
if __name__ == "__main__":
iface.launch()