Spaces:
Sleeping
Sleeping
File size: 16,100 Bytes
1a21193 700fefb c0d2229 700fefb e98f158 700fefb 1a21193 700fefb 1a21193 700fefb e98f158 700fefb e98f158 700fefb e98f158 700fefb 1a21193 700fefb 1a21193 700fefb 1a21193 700fefb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 |
import gradio as gr
import google.generativeai as genai
import os
import re
from PIL import Image
from huggingface_hub import InferenceClient
import traceback
# --- Configuration ---
# 1. Configure Gemini API Key
try:
gemini_api_key = os.environ["GEMINI_API_KEY"]
genai.configure(api_key=gemini_api_key)
print("Gemini API Key loaded successfully.")
except KeyError:
print("ERROR: GEMINI_API_KEY environment variable not set.")
exit("Please set the GEMINI_API_KEY environment variable and restart.")
except Exception as e:
print(f"An unexpected error occurred during Gemini configuration: {e}")
exit()
# 2. Configure Together AI Client (using HF_TOKEN environment variable)
try:
together_ai_key = os.environ["HF_TOKEN"] # Use HF_TOKEN as requested
if not together_ai_key:
raise ValueError("HF_TOKEN environment variable is set but empty.")
print("Together AI Key (from HF_TOKEN) loaded successfully.")
# Initialize InferenceClient for Together AI provider
together_client = InferenceClient(
provider="together",
token=together_ai_key # Use 'token' argument for the key
)
print("Together AI InferenceClient initialized.")
except KeyError:
print("ERROR: HF_TOKEN environment variable not set (expected for Together AI key).")
exit("Please set the HF_TOKEN environment variable with your Together AI key and restart.")
except ValueError as e:
print(f"ERROR: {e}")
exit()
except Exception as e:
print(f"An unexpected error occurred during Together AI client initialization: {e}")
traceback.print_exc() # Print full traceback for debugging
exit()
# --- Model Settings ---
# Gemini Settings
GEMINI_MODEL_NAME = "gemini-1.5-pro-latest" # Explicitly set as requested
LYRIC_GENERATION_CONFIG = {
"temperature": 0.7,
"top_p": 0.95,
"max_output_tokens": 1024,
}
LYRIC_SAFETY_SETTINGS = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
]
# Stable Diffusion XL Settings (via Together AI)
SDXL_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0" # Model on Together AI
# Map our desired params to InferenceClient expected params
# Check documentation for exact naming if issues arise
SDXL_PARAMS = {
"guidance_scale": 7.5, # Maps from CFG Scale
"num_inference_steps": 50, # Maps from Steps
"width": 1024,
"height": 1024,
"negative_prompt": "blurry, distorted, low quality, pixelated, text, words, letters, watermark, signature, deformed faces, multiple images, grids, writing, font",
}
# --- Prompt Templates ---
LYRIC_PROMPT_TEMPLATE = """
You are an expert songwriter with extensive experience in multiple musical genres.
Create original song lyrics with the following specifications:
Genre: {user_genre}
Mood: {user_mood}
Purpose: {user_purpose}
Additional Description: {user_description}
Requirements:
1. Create complete lyrics with verse(s), chorus, and bridge if appropriate.
2. Maintain consistency with the specified genre's conventions.
3. Evoke the requested mood throughout the song.
4. Incorporate themes related to the stated purpose.
5. Include elements from the additional description.
6. Ensure lyrics flow naturally and have musical potential.
7. Length: 3-4 verses, repeating chorus, optional bridge (total 20-30 lines, approximately).
8. Structure the output clearly: Start with 'Title: [Your Song Title]' on the first line, followed by a blank line, then the full lyrics.
Title: [Create an original, compelling title for the song]
[Generate complete song lyrics below this line, following the title format above]
"""
COVER_ART_PROMPT_TEMPLATE = """
Create an album cover art for a song with the following details:
Song Title: {song_title}
Genre: {user_genre}
Mood: {user_mood}
Theme: {derived_from_lyrics}
Key Imagery: {key_elements_from_lyrics}
Style Requirements:
- Professional album cover quality, digital art style.
- Visually represent the mood ({user_mood}) and theme ({derived_from_lyrics}) of the song.
- Incorporate imagery that reflects the song's meaning ({key_elements_from_lyrics}).
- Use a color palette that evokes the specified mood ({user_mood}).
- Design should be appropriate for the musical genre ({user_genre}).
- Include balanced composition, potentially with space where a title could be overlaid later (but do *not* generate the text itself).
- No text, letters, words, signatures, or watermarks in the generated image.
Create a striking, emotionally resonant album cover that a listener would find compelling. High detail, atmospheric lighting.
"""
# --- Core Functions ---
def generate_lyrics(genre, mood, purpose, description):
"""Generates song lyrics using the Gemini API."""
try:
model = genai.GenerativeModel(
model_name=GEMINI_MODEL_NAME,
generation_config=LYRIC_GENERATION_CONFIG,
safety_settings=LYRIC_SAFETY_SETTINGS
)
prompt = LYRIC_PROMPT_TEMPLATE.format(
user_genre=genre,
user_mood=mood,
user_purpose=purpose,
user_description=description
)
print(f"\n--- Sending Prompt to Gemini ({GEMINI_MODEL_NAME}) ---")
response = model.generate_content(prompt)
print("--- Received response from Gemini ---")
# Basic Parsing: Assumes model follows "Title: ..." format
# Handle potential blocked content or empty responses
if not response.parts:
if response.prompt_feedback.block_reason:
block_reason = response.prompt_feedback.block_reason
safety_ratings = response.prompt_feedback.safety_ratings
print(f"Warning: Gemini response blocked. Reason: {block_reason}")
print(f"Safety Ratings: {safety_ratings}")
return "Blocked Content", f"Lyrics generation blocked due to: {block_reason}. Please revise inputs. Ratings: {safety_ratings}"
else:
print("Warning: Gemini returned an empty response.")
return "Empty Response", "Gemini returned no content. Try again or adjust inputs."
raw_text = response.text # Access text safely now
title_match = re.match(r"Title:\s*(.*)", raw_text)
if title_match:
title = title_match.group(1).strip()
# Remove the title line and potential leading newlines from lyrics
lyrics = re.sub(r"Title:\s*.*\n\n?", "", raw_text, count=1).strip()
return title, lyrics
else:
# Fallback if title format isn't matched perfectly
print("Warning: Could not parse title automatically. Returning full text as lyrics.")
return "Title Not Found", raw_text
except Exception as e:
print(f"Error during lyric generation: {e}")
traceback.print_exc()
# Check for specific API errors if needed (e.g., content filtering)
if hasattr(e, 'response') and hasattr(e.response, 'prompt_feedback'):
print(f"Prompt Feedback: {e.response.prompt_feedback}")
return "Error Generating Lyrics", f"An error occurred: {e}"
def extract_themes_for_cover_art(lyrics, mood, purpose):
"""Simple theme extraction (can be improved with another LLM call)."""
theme = f"{mood}, related to {purpose}"
lines = lyrics.split('\n')
key_elements = f"Imagery reflecting: {mood}. "
# Try to find key lines (simple approach)
first_lines = [line for line in lines if line.strip() and not line.strip().startswith('[')]
key_elements += " ".join(first_lines[:3]) # Add first 3 non-empty, non-tag lines
# Clean up potentially long key elements
key_elements = (key_elements[:250] + '...') if len(key_elements) > 250 else key_elements
theme = (theme[:150] + '...') if len(theme) > 150 else theme
return theme.strip(), key_elements.strip()
# --- Actual Cover Art Generation using Together AI ---
def generate_cover_art_togetherai(title, genre, mood, theme, key_imagery):
"""Generates cover art using Together AI via InferenceClient."""
print("\n--- Generating Cover Art (Together AI) ---")
print(f"Model: {SDXL_MODEL_ID}")
print(f"Title: {title}")
print(f"Genre: {genre}")
print(f"Mood: {mood}")
print(f"Theme: {theme}")
print(f"Key Imagery: {key_imagery}")
# Construct the detailed prompt for SDXL
sdxl_prompt = COVER_ART_PROMPT_TEMPLATE.format(
song_title=title,
user_genre=genre,
user_mood=mood,
derived_from_lyrics=theme,
key_elements_from_lyrics=key_imagery
)
print(f"\nSDXL Prompt:\n{sdxl_prompt}")
print(f"\nSDXL Params: {SDXL_PARAMS}")
print("--- Calling Together AI API ---")
try:
# Call the Together AI endpoint via InferenceClient
image = together_client.text_to_image(
prompt=sdxl_prompt,
model=SDXL_MODEL_ID,
negative_prompt=SDXL_PARAMS["negative_prompt"],
guidance_scale=SDXL_PARAMS["guidance_scale"],
num_inference_steps=SDXL_PARAMS["num_inference_steps"],
width=SDXL_PARAMS["width"],
height=SDXL_PARAMS["height"],
# Add other parameters supported by the specific model/provider if needed
)
print("--- Received Image from Together AI ---")
# image is already a PIL.Image object
if isinstance(image, Image.Image):
return image
else:
print(f"Error: Received unexpected type from API: {type(image)}")
return create_placeholder_image("API Error (Type)")
except Exception as e:
print(f"Error calling Together AI API: {e}")
traceback.print_exc() # Print full traceback
# Attempt to create a placeholder with the error message
error_message = str(e)
# Shorten long error messages for the placeholder
if len(error_message) > 100:
error_message = error_message[:100] + "..."
return create_placeholder_image(f"API Error:\n{error_message}")
def create_placeholder_image(text="Placeholder"):
"""Creates a simple PIL image with text."""
img = Image.new('RGB', (1024, 1024), color = (115, 115, 115)) # Grey background
print(f"Placeholder image generated with text hint: {text}")
try:
from PIL import ImageDraw, ImageFont
draw = ImageDraw.Draw(img)
try:
# Try loading a default font
font_path = "DejaVuSans.ttf" if os.path.exists("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf") else "arial.ttf"
font = ImageFont.truetype(font_path, 50) # Slightly smaller font
except IOError:
print("Warning: Default fonts (DejaVuSans, Arial) not found. Using basic PIL font.")
font = ImageFont.load_default() # Fallback
# Wrap text for better display
lines = []
if isinstance(text, str):
words = text.split()
current_line = ""
for word in words:
test_line = f"{current_line} {word}".strip()
# Check width - adjust '40' based on font/desired width
if len(test_line) < 40:
current_line = test_line
else:
lines.append(current_line)
current_line = word
lines.append(current_line) # Add the last line
else:
lines = ["Invalid Text"]
y_text = (1024 - (len(lines) * (font.size + 5))) / 2 # Calculate starting y
for line in lines:
text_bbox = draw.textbbox((0, 0), line, font=font)
text_width = text_bbox[2] - text_bbox[0]
position = ((1024 - text_width) / 2, y_text)
draw.text(position, line, fill=(255, 255, 255), font=font) # White text
y_text += font.size + 5 # Move y down for next line
except ImportError:
print("Pillow's ImageDraw/ImageFont not fully available. Placeholder will be blank gray.")
except Exception as e:
print(f"Error drawing text on placeholder: {e}")
return img
# --- Main Gradio App Function ---
def music_generator_app(genre, mood, purpose, description):
"""Orchestrates lyric and cover art generation."""
print("\n" + "="*30)
print("--- Starting Music Generation Request ---")
print(f"Inputs: Genre='{genre}', Mood='{mood}', Purpose='{purpose}', Desc='{description[:50]}...'")
print("="*30 + "\n")
# 1. Generate Lyrics
song_title, lyrics = generate_lyrics(genre, mood, purpose, description)
if "Error Generating Lyrics" in song_title or "Blocked Content" in song_title or "Empty Response" in song_title:
# Handle lyric generation failure
placeholder_img = create_placeholder_image("Lyric Gen Failed\n" + song_title)
print(f"Lyric generation failed. Title: {song_title}, Message: {lyrics}")
return song_title, lyrics, placeholder_img # Return error messages and placeholder
print(f"\n--- Lyrics Generated ---\nTitle: {song_title}\nLyrics:\n{lyrics[:150]}...\n------------------------")
# 2. Generate Cover Art (using actual Together AI call)
theme, key_imagery = extract_themes_for_cover_art(lyrics, mood, purpose)
cover_art = generate_cover_art_togetherai(song_title, genre, mood, theme, key_imagery) # <-- Use the new function
print("--- Music Generation Complete ---")
return song_title, lyrics, cover_art
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("# 🎵 Music Generator 🎶")
gr.Markdown("Generate song lyrics (Gemini 1.5 Pro) and album cover art (Stable Diffusion XL via Together AI) based on your ideas.")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Input Details")
input_genre = gr.Textbox(label="Genre", placeholder="e.g., Indie Folk, Synthwave, Power Metal")
input_mood = gr.Textbox(label="Mood", placeholder="e.g., Melancholic, Hopeful, Energetic, Mysterious")
input_purpose = gr.Textbox(label="Purpose / Theme", placeholder="e.g., Overcoming hardship, A rainy night drive, Celebrating friendship")
input_description = gr.Textbox(label="Additional Description (Optional)", lines=3, placeholder="e.g., Include imagery of stars and oceans, mention a specific city, focus on a specific instrument's feel")
generate_button = gr.Button("✨ Generate Music Concept ✨", variant="primary")
with gr.Column(scale=2):
gr.Markdown("### Generated Output")
output_title = gr.Textbox(label="Song Title", interactive=False)
output_lyrics = gr.Textbox(label="Lyrics", lines=15, interactive=False, max_lines=30) # Allow more lines for display
output_cover_art = gr.Image(label="Generated Cover Art", type="pil", width=512, height=512) # Use PIL format
generate_button.click(
fn=music_generator_app,
inputs=[input_genre, input_mood, input_purpose, input_description],
outputs=[output_title, output_lyrics, output_cover_art]
)
gr.Markdown("---")
gr.Markdown("Powered by Google Gemini (`gemini-1.5-pro-latest`) and Together AI (`stabilityai/stable-diffusion-xl-base-1.0`).")
gr.Markdown("**Requires environment variables:** `GEMINI_API_KEY` and `HF_TOKEN` (containing your Together AI key).")
# --- Launch the App ---
if __name__ == "__main__":
# Ensure necessary libraries for placeholder image text drawing are available
try:
from PIL import ImageDraw, ImageFont
except ImportError:
print("\nWARNING: Pillow's ImageDraw or ImageFont not found. Placeholder images may lack text.")
print("Install them with: pip install Pillow\n")
app.launch(debug=True) # Set debug=False for deployment |