Spaces:

cuneytkaya
/

LyricsCoverartGenerator

Sleeping

File size: 16,100 Bytes

import gradio as gr
import google.generativeai as genai
import os
import re
from PIL import Image
from huggingface_hub import InferenceClient 
import traceback 

# --- Configuration ---

# 1. Configure Gemini API Key
try:
    gemini_api_key = os.environ["GEMINI_API_KEY"]
    genai.configure(api_key=gemini_api_key)
    print("Gemini API Key loaded successfully.")
except KeyError:
    print("ERROR: GEMINI_API_KEY environment variable not set.")
    exit("Please set the GEMINI_API_KEY environment variable and restart.")
except Exception as e:
    print(f"An unexpected error occurred during Gemini configuration: {e}")
    exit()

# 2. Configure Together AI Client (using HF_TOKEN environment variable)
try:
    together_ai_key = os.environ["HF_TOKEN"] # Use HF_TOKEN as requested
    if not together_ai_key:
        raise ValueError("HF_TOKEN environment variable is set but empty.")
    print("Together AI Key (from HF_TOKEN) loaded successfully.")
    # Initialize InferenceClient for Together AI provider
    together_client = InferenceClient(
        provider="together",
        token=together_ai_key # Use 'token' argument for the key
    )
    print("Together AI InferenceClient initialized.")
except KeyError:
    print("ERROR: HF_TOKEN environment variable not set (expected for Together AI key).")
    exit("Please set the HF_TOKEN environment variable with your Together AI key and restart.")
except ValueError as e:
    print(f"ERROR: {e}")
    exit()
except Exception as e:
    print(f"An unexpected error occurred during Together AI client initialization: {e}")
    traceback.print_exc() # Print full traceback for debugging
    exit()


# --- Model Settings ---

# Gemini Settings
GEMINI_MODEL_NAME = "gemini-1.5-pro-latest" # Explicitly set as requested
LYRIC_GENERATION_CONFIG = {
    "temperature": 0.7,
    "top_p": 0.95,
    "max_output_tokens": 1024,
}
LYRIC_SAFETY_SETTINGS = [
    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
]

# Stable Diffusion XL Settings (via Together AI)
SDXL_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0" # Model on Together AI
# Map our desired params to InferenceClient expected params
# Check documentation for exact naming if issues arise
SDXL_PARAMS = {
    "guidance_scale": 7.5, # Maps from CFG Scale
    "num_inference_steps": 50, # Maps from Steps
    "width": 1024,
    "height": 1024,
    "negative_prompt": "blurry, distorted, low quality, pixelated, text, words, letters, watermark, signature, deformed faces, multiple images, grids, writing, font",
}

# --- Prompt Templates ---

LYRIC_PROMPT_TEMPLATE = """
You are an expert songwriter with extensive experience in multiple musical genres.
Create original song lyrics with the following specifications:

Genre: {user_genre}
Mood: {user_mood}
Purpose: {user_purpose}
Additional Description: {user_description}

Requirements:
1. Create complete lyrics with verse(s), chorus, and bridge if appropriate.
2. Maintain consistency with the specified genre's conventions.
3. Evoke the requested mood throughout the song.
4. Incorporate themes related to the stated purpose.
5. Include elements from the additional description.
6. Ensure lyrics flow naturally and have musical potential.
7. Length: 3-4 verses, repeating chorus, optional bridge (total 20-30 lines, approximately).
8. Structure the output clearly: Start with 'Title: [Your Song Title]' on the first line, followed by a blank line, then the full lyrics.

Title: [Create an original, compelling title for the song]

[Generate complete song lyrics below this line, following the title format above]
"""

COVER_ART_PROMPT_TEMPLATE = """
Create an album cover art for a song with the following details:

Song Title: {song_title}
Genre: {user_genre}
Mood: {user_mood}
Theme: {derived_from_lyrics}
Key Imagery: {key_elements_from_lyrics}

Style Requirements:
- Professional album cover quality, digital art style.
- Visually represent the mood ({user_mood}) and theme ({derived_from_lyrics}) of the song.
- Incorporate imagery that reflects the song's meaning ({key_elements_from_lyrics}).
- Use a color palette that evokes the specified mood ({user_mood}).
- Design should be appropriate for the musical genre ({user_genre}).
- Include balanced composition, potentially with space where a title could be overlaid later (but do *not* generate the text itself).
- No text, letters, words, signatures, or watermarks in the generated image.

Create a striking, emotionally resonant album cover that a listener would find compelling. High detail, atmospheric lighting.
"""

# --- Core Functions ---

def generate_lyrics(genre, mood, purpose, description):
    """Generates song lyrics using the Gemini API."""
    try:
        model = genai.GenerativeModel(
            model_name=GEMINI_MODEL_NAME,
            generation_config=LYRIC_GENERATION_CONFIG,
            safety_settings=LYRIC_SAFETY_SETTINGS
        )
        prompt = LYRIC_PROMPT_TEMPLATE.format(
            user_genre=genre,
            user_mood=mood,
            user_purpose=purpose,
            user_description=description
        )
        print(f"\n--- Sending Prompt to Gemini ({GEMINI_MODEL_NAME}) ---")
        response = model.generate_content(prompt)
        print("--- Received response from Gemini ---")


        # Basic Parsing: Assumes model follows "Title: ..." format
        # Handle potential blocked content or empty responses
        if not response.parts:
             if response.prompt_feedback.block_reason:
                 block_reason = response.prompt_feedback.block_reason
                 safety_ratings = response.prompt_feedback.safety_ratings
                 print(f"Warning: Gemini response blocked. Reason: {block_reason}")
                 print(f"Safety Ratings: {safety_ratings}")
                 return "Blocked Content", f"Lyrics generation blocked due to: {block_reason}. Please revise inputs. Ratings: {safety_ratings}"
             else:
                 print("Warning: Gemini returned an empty response.")
                 return "Empty Response", "Gemini returned no content. Try again or adjust inputs."

        raw_text = response.text # Access text safely now
        title_match = re.match(r"Title:\s*(.*)", raw_text)
        if title_match:
            title = title_match.group(1).strip()
            # Remove the title line and potential leading newlines from lyrics
            lyrics = re.sub(r"Title:\s*.*\n\n?", "", raw_text, count=1).strip()
            return title, lyrics
        else:
            # Fallback if title format isn't matched perfectly
            print("Warning: Could not parse title automatically. Returning full text as lyrics.")
            return "Title Not Found", raw_text

    except Exception as e:
        print(f"Error during lyric generation: {e}")
        traceback.print_exc()
        # Check for specific API errors if needed (e.g., content filtering)
        if hasattr(e, 'response') and hasattr(e.response, 'prompt_feedback'):
             print(f"Prompt Feedback: {e.response.prompt_feedback}")
        return "Error Generating Lyrics", f"An error occurred: {e}"

def extract_themes_for_cover_art(lyrics, mood, purpose):
    """Simple theme extraction (can be improved with another LLM call)."""
    theme = f"{mood}, related to {purpose}"
    lines = lyrics.split('\n')
    key_elements = f"Imagery reflecting: {mood}. "
    # Try to find key lines (simple approach)
    first_lines = [line for line in lines if line.strip() and not line.strip().startswith('[')]
    key_elements += " ".join(first_lines[:3]) # Add first 3 non-empty, non-tag lines

    # Clean up potentially long key elements
    key_elements = (key_elements[:250] + '...') if len(key_elements) > 250 else key_elements
    theme = (theme[:150] + '...') if len(theme) > 150 else theme


    return theme.strip(), key_elements.strip()

# --- Actual Cover Art Generation using Together AI ---
def generate_cover_art_togetherai(title, genre, mood, theme, key_imagery):
    """Generates cover art using Together AI via InferenceClient."""
    print("\n--- Generating Cover Art (Together AI) ---")
    print(f"Model: {SDXL_MODEL_ID}")
    print(f"Title: {title}")
    print(f"Genre: {genre}")
    print(f"Mood: {mood}")
    print(f"Theme: {theme}")
    print(f"Key Imagery: {key_imagery}")

    # Construct the detailed prompt for SDXL
    sdxl_prompt = COVER_ART_PROMPT_TEMPLATE.format(
        song_title=title,
        user_genre=genre,
        user_mood=mood,
        derived_from_lyrics=theme,
        key_elements_from_lyrics=key_imagery
    )
    print(f"\nSDXL Prompt:\n{sdxl_prompt}")
    print(f"\nSDXL Params: {SDXL_PARAMS}")
    print("--- Calling Together AI API ---")

    try:
        # Call the Together AI endpoint via InferenceClient
        image = together_client.text_to_image(
            prompt=sdxl_prompt,
            model=SDXL_MODEL_ID,
            negative_prompt=SDXL_PARAMS["negative_prompt"],
            guidance_scale=SDXL_PARAMS["guidance_scale"],
            num_inference_steps=SDXL_PARAMS["num_inference_steps"],
            width=SDXL_PARAMS["width"],
            height=SDXL_PARAMS["height"],
            # Add other parameters supported by the specific model/provider if needed
        )
        print("--- Received Image from Together AI ---")
        # image is already a PIL.Image object
        if isinstance(image, Image.Image):
             return image
        else:
             print(f"Error: Received unexpected type from API: {type(image)}")
             return create_placeholder_image("API Error (Type)")


    except Exception as e:
        print(f"Error calling Together AI API: {e}")
        traceback.print_exc() # Print full traceback
        # Attempt to create a placeholder with the error message
        error_message = str(e)
        # Shorten long error messages for the placeholder
        if len(error_message) > 100:
             error_message = error_message[:100] + "..."
        return create_placeholder_image(f"API Error:\n{error_message}")


def create_placeholder_image(text="Placeholder"):
    """Creates a simple PIL image with text."""
    img = Image.new('RGB', (1024, 1024), color = (115, 115, 115)) # Grey background
    print(f"Placeholder image generated with text hint: {text}")
    try:
        from PIL import ImageDraw, ImageFont
        draw = ImageDraw.Draw(img)
        try:
            # Try loading a default font
             font_path = "DejaVuSans.ttf" if os.path.exists("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf") else "arial.ttf"
             font = ImageFont.truetype(font_path, 50) # Slightly smaller font
        except IOError:
             print("Warning: Default fonts (DejaVuSans, Arial) not found. Using basic PIL font.")
             font = ImageFont.load_default() # Fallback
        # Wrap text for better display
        lines = []
        if isinstance(text, str):
            words = text.split()
            current_line = ""
            for word in words:
                test_line = f"{current_line} {word}".strip()
                # Check width - adjust '40' based on font/desired width
                if len(test_line) < 40:
                     current_line = test_line
                else:
                     lines.append(current_line)
                     current_line = word
            lines.append(current_line) # Add the last line
        else:
            lines = ["Invalid Text"]


        y_text = (1024 - (len(lines) * (font.size + 5))) / 2 # Calculate starting y

        for line in lines:
             text_bbox = draw.textbbox((0, 0), line, font=font)
             text_width = text_bbox[2] - text_bbox[0]
             position = ((1024 - text_width) / 2, y_text)
             draw.text(position, line, fill=(255, 255, 255), font=font) # White text
             y_text += font.size + 5 # Move y down for next line


    except ImportError:
        print("Pillow's ImageDraw/ImageFont not fully available. Placeholder will be blank gray.")
    except Exception as e:
        print(f"Error drawing text on placeholder: {e}")

    return img


# --- Main Gradio App Function ---

def music_generator_app(genre, mood, purpose, description):
    """Orchestrates lyric and cover art generation."""
    print("\n" + "="*30)
    print("--- Starting Music Generation Request ---")
    print(f"Inputs: Genre='{genre}', Mood='{mood}', Purpose='{purpose}', Desc='{description[:50]}...'")
    print("="*30 + "\n")


    # 1. Generate Lyrics
    song_title, lyrics = generate_lyrics(genre, mood, purpose, description)

    if "Error Generating Lyrics" in song_title or "Blocked Content" in song_title or "Empty Response" in song_title:
        # Handle lyric generation failure
        placeholder_img = create_placeholder_image("Lyric Gen Failed\n" + song_title)
        print(f"Lyric generation failed. Title: {song_title}, Message: {lyrics}")
        return song_title, lyrics, placeholder_img # Return error messages and placeholder

    print(f"\n--- Lyrics Generated ---\nTitle: {song_title}\nLyrics:\n{lyrics[:150]}...\n------------------------")

    # 2. Generate Cover Art (using actual Together AI call)
    theme, key_imagery = extract_themes_for_cover_art(lyrics, mood, purpose)
    cover_art = generate_cover_art_togetherai(song_title, genre, mood, theme, key_imagery) # <-- Use the new function

    print("--- Music Generation Complete ---")
    return song_title, lyrics, cover_art

# --- Gradio Interface ---

with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# 🎵 Music Generator 🎶")
    gr.Markdown("Generate song lyrics (Gemini 1.5 Pro) and album cover art (Stable Diffusion XL via Together AI) based on your ideas.")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Input Details")
            input_genre = gr.Textbox(label="Genre", placeholder="e.g., Indie Folk, Synthwave, Power Metal")
            input_mood = gr.Textbox(label="Mood", placeholder="e.g., Melancholic, Hopeful, Energetic, Mysterious")
            input_purpose = gr.Textbox(label="Purpose / Theme", placeholder="e.g., Overcoming hardship, A rainy night drive, Celebrating friendship")
            input_description = gr.Textbox(label="Additional Description (Optional)", lines=3, placeholder="e.g., Include imagery of stars and oceans, mention a specific city, focus on a specific instrument's feel")
            generate_button = gr.Button("✨ Generate Music Concept ✨", variant="primary")

        with gr.Column(scale=2):
            gr.Markdown("### Generated Output")
            output_title = gr.Textbox(label="Song Title", interactive=False)
            output_lyrics = gr.Textbox(label="Lyrics", lines=15, interactive=False, max_lines=30) # Allow more lines for display
            output_cover_art = gr.Image(label="Generated Cover Art", type="pil", width=512, height=512) # Use PIL format


    generate_button.click(
        fn=music_generator_app,
        inputs=[input_genre, input_mood, input_purpose, input_description],
        outputs=[output_title, output_lyrics, output_cover_art]
    )

    gr.Markdown("---")
    gr.Markdown("Powered by Google Gemini (`gemini-1.5-pro-latest`) and Together AI (`stabilityai/stable-diffusion-xl-base-1.0`).")
    gr.Markdown("**Requires environment variables:** `GEMINI_API_KEY` and `HF_TOKEN` (containing your Together AI key).")

# --- Launch the App ---
if __name__ == "__main__":
    # Ensure necessary libraries for placeholder image text drawing are available
    try:
        from PIL import ImageDraw, ImageFont
    except ImportError:
        print("\nWARNING: Pillow's ImageDraw or ImageFont not found. Placeholder images may lack text.")
        print("Install them with: pip install Pillow\n")

    app.launch(debug=True) # Set debug=False for deployment