File size: 16,100 Bytes
1a21193
700fefb
 
 
 
c0d2229
 
700fefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e98f158
700fefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a21193
700fefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a21193
700fefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e98f158
700fefb
 
 
 
e98f158
700fefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e98f158
700fefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a21193
700fefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a21193
 
700fefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a21193
700fefb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
import gradio as gr
import google.generativeai as genai
import os
import re
from PIL import Image
from huggingface_hub import InferenceClient 
import traceback 

# --- Configuration ---

# 1. Configure Gemini API Key
try:
    gemini_api_key = os.environ["GEMINI_API_KEY"]
    genai.configure(api_key=gemini_api_key)
    print("Gemini API Key loaded successfully.")
except KeyError:
    print("ERROR: GEMINI_API_KEY environment variable not set.")
    exit("Please set the GEMINI_API_KEY environment variable and restart.")
except Exception as e:
    print(f"An unexpected error occurred during Gemini configuration: {e}")
    exit()

# 2. Configure Together AI Client (using HF_TOKEN environment variable)
try:
    together_ai_key = os.environ["HF_TOKEN"] # Use HF_TOKEN as requested
    if not together_ai_key:
        raise ValueError("HF_TOKEN environment variable is set but empty.")
    print("Together AI Key (from HF_TOKEN) loaded successfully.")
    # Initialize InferenceClient for Together AI provider
    together_client = InferenceClient(
        provider="together",
        token=together_ai_key # Use 'token' argument for the key
    )
    print("Together AI InferenceClient initialized.")
except KeyError:
    print("ERROR: HF_TOKEN environment variable not set (expected for Together AI key).")
    exit("Please set the HF_TOKEN environment variable with your Together AI key and restart.")
except ValueError as e:
    print(f"ERROR: {e}")
    exit()
except Exception as e:
    print(f"An unexpected error occurred during Together AI client initialization: {e}")
    traceback.print_exc() # Print full traceback for debugging
    exit()


# --- Model Settings ---

# Gemini Settings
GEMINI_MODEL_NAME = "gemini-1.5-pro-latest" # Explicitly set as requested
LYRIC_GENERATION_CONFIG = {
    "temperature": 0.7,
    "top_p": 0.95,
    "max_output_tokens": 1024,
}
LYRIC_SAFETY_SETTINGS = [
    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
]

# Stable Diffusion XL Settings (via Together AI)
SDXL_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0" # Model on Together AI
# Map our desired params to InferenceClient expected params
# Check documentation for exact naming if issues arise
SDXL_PARAMS = {
    "guidance_scale": 7.5, # Maps from CFG Scale
    "num_inference_steps": 50, # Maps from Steps
    "width": 1024,
    "height": 1024,
    "negative_prompt": "blurry, distorted, low quality, pixelated, text, words, letters, watermark, signature, deformed faces, multiple images, grids, writing, font",
}

# --- Prompt Templates ---

LYRIC_PROMPT_TEMPLATE = """
You are an expert songwriter with extensive experience in multiple musical genres.
Create original song lyrics with the following specifications:

Genre: {user_genre}
Mood: {user_mood}
Purpose: {user_purpose}
Additional Description: {user_description}

Requirements:
1. Create complete lyrics with verse(s), chorus, and bridge if appropriate.
2. Maintain consistency with the specified genre's conventions.
3. Evoke the requested mood throughout the song.
4. Incorporate themes related to the stated purpose.
5. Include elements from the additional description.
6. Ensure lyrics flow naturally and have musical potential.
7. Length: 3-4 verses, repeating chorus, optional bridge (total 20-30 lines, approximately).
8. Structure the output clearly: Start with 'Title: [Your Song Title]' on the first line, followed by a blank line, then the full lyrics.

Title: [Create an original, compelling title for the song]

[Generate complete song lyrics below this line, following the title format above]
"""

COVER_ART_PROMPT_TEMPLATE = """
Create an album cover art for a song with the following details:

Song Title: {song_title}
Genre: {user_genre}
Mood: {user_mood}
Theme: {derived_from_lyrics}
Key Imagery: {key_elements_from_lyrics}

Style Requirements:
- Professional album cover quality, digital art style.
- Visually represent the mood ({user_mood}) and theme ({derived_from_lyrics}) of the song.
- Incorporate imagery that reflects the song's meaning ({key_elements_from_lyrics}).
- Use a color palette that evokes the specified mood ({user_mood}).
- Design should be appropriate for the musical genre ({user_genre}).
- Include balanced composition, potentially with space where a title could be overlaid later (but do *not* generate the text itself).
- No text, letters, words, signatures, or watermarks in the generated image.

Create a striking, emotionally resonant album cover that a listener would find compelling. High detail, atmospheric lighting.
"""

# --- Core Functions ---

def generate_lyrics(genre, mood, purpose, description):
    """Generates song lyrics using the Gemini API."""
    try:
        model = genai.GenerativeModel(
            model_name=GEMINI_MODEL_NAME,
            generation_config=LYRIC_GENERATION_CONFIG,
            safety_settings=LYRIC_SAFETY_SETTINGS
        )
        prompt = LYRIC_PROMPT_TEMPLATE.format(
            user_genre=genre,
            user_mood=mood,
            user_purpose=purpose,
            user_description=description
        )
        print(f"\n--- Sending Prompt to Gemini ({GEMINI_MODEL_NAME}) ---")
        response = model.generate_content(prompt)
        print("--- Received response from Gemini ---")


        # Basic Parsing: Assumes model follows "Title: ..." format
        # Handle potential blocked content or empty responses
        if not response.parts:
             if response.prompt_feedback.block_reason:
                 block_reason = response.prompt_feedback.block_reason
                 safety_ratings = response.prompt_feedback.safety_ratings
                 print(f"Warning: Gemini response blocked. Reason: {block_reason}")
                 print(f"Safety Ratings: {safety_ratings}")
                 return "Blocked Content", f"Lyrics generation blocked due to: {block_reason}. Please revise inputs. Ratings: {safety_ratings}"
             else:
                 print("Warning: Gemini returned an empty response.")
                 return "Empty Response", "Gemini returned no content. Try again or adjust inputs."

        raw_text = response.text # Access text safely now
        title_match = re.match(r"Title:\s*(.*)", raw_text)
        if title_match:
            title = title_match.group(1).strip()
            # Remove the title line and potential leading newlines from lyrics
            lyrics = re.sub(r"Title:\s*.*\n\n?", "", raw_text, count=1).strip()
            return title, lyrics
        else:
            # Fallback if title format isn't matched perfectly
            print("Warning: Could not parse title automatically. Returning full text as lyrics.")
            return "Title Not Found", raw_text

    except Exception as e:
        print(f"Error during lyric generation: {e}")
        traceback.print_exc()
        # Check for specific API errors if needed (e.g., content filtering)
        if hasattr(e, 'response') and hasattr(e.response, 'prompt_feedback'):
             print(f"Prompt Feedback: {e.response.prompt_feedback}")
        return "Error Generating Lyrics", f"An error occurred: {e}"

def extract_themes_for_cover_art(lyrics, mood, purpose):
    """Simple theme extraction (can be improved with another LLM call)."""
    theme = f"{mood}, related to {purpose}"
    lines = lyrics.split('\n')
    key_elements = f"Imagery reflecting: {mood}. "
    # Try to find key lines (simple approach)
    first_lines = [line for line in lines if line.strip() and not line.strip().startswith('[')]
    key_elements += " ".join(first_lines[:3]) # Add first 3 non-empty, non-tag lines

    # Clean up potentially long key elements
    key_elements = (key_elements[:250] + '...') if len(key_elements) > 250 else key_elements
    theme = (theme[:150] + '...') if len(theme) > 150 else theme


    return theme.strip(), key_elements.strip()

# --- Actual Cover Art Generation using Together AI ---
def generate_cover_art_togetherai(title, genre, mood, theme, key_imagery):
    """Generates cover art using Together AI via InferenceClient."""
    print("\n--- Generating Cover Art (Together AI) ---")
    print(f"Model: {SDXL_MODEL_ID}")
    print(f"Title: {title}")
    print(f"Genre: {genre}")
    print(f"Mood: {mood}")
    print(f"Theme: {theme}")
    print(f"Key Imagery: {key_imagery}")

    # Construct the detailed prompt for SDXL
    sdxl_prompt = COVER_ART_PROMPT_TEMPLATE.format(
        song_title=title,
        user_genre=genre,
        user_mood=mood,
        derived_from_lyrics=theme,
        key_elements_from_lyrics=key_imagery
    )
    print(f"\nSDXL Prompt:\n{sdxl_prompt}")
    print(f"\nSDXL Params: {SDXL_PARAMS}")
    print("--- Calling Together AI API ---")

    try:
        # Call the Together AI endpoint via InferenceClient
        image = together_client.text_to_image(
            prompt=sdxl_prompt,
            model=SDXL_MODEL_ID,
            negative_prompt=SDXL_PARAMS["negative_prompt"],
            guidance_scale=SDXL_PARAMS["guidance_scale"],
            num_inference_steps=SDXL_PARAMS["num_inference_steps"],
            width=SDXL_PARAMS["width"],
            height=SDXL_PARAMS["height"],
            # Add other parameters supported by the specific model/provider if needed
        )
        print("--- Received Image from Together AI ---")
        # image is already a PIL.Image object
        if isinstance(image, Image.Image):
             return image
        else:
             print(f"Error: Received unexpected type from API: {type(image)}")
             return create_placeholder_image("API Error (Type)")


    except Exception as e:
        print(f"Error calling Together AI API: {e}")
        traceback.print_exc() # Print full traceback
        # Attempt to create a placeholder with the error message
        error_message = str(e)
        # Shorten long error messages for the placeholder
        if len(error_message) > 100:
             error_message = error_message[:100] + "..."
        return create_placeholder_image(f"API Error:\n{error_message}")


def create_placeholder_image(text="Placeholder"):
    """Creates a simple PIL image with text."""
    img = Image.new('RGB', (1024, 1024), color = (115, 115, 115)) # Grey background
    print(f"Placeholder image generated with text hint: {text}")
    try:
        from PIL import ImageDraw, ImageFont
        draw = ImageDraw.Draw(img)
        try:
            # Try loading a default font
             font_path = "DejaVuSans.ttf" if os.path.exists("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf") else "arial.ttf"
             font = ImageFont.truetype(font_path, 50) # Slightly smaller font
        except IOError:
             print("Warning: Default fonts (DejaVuSans, Arial) not found. Using basic PIL font.")
             font = ImageFont.load_default() # Fallback
        # Wrap text for better display
        lines = []
        if isinstance(text, str):
            words = text.split()
            current_line = ""
            for word in words:
                test_line = f"{current_line} {word}".strip()
                # Check width - adjust '40' based on font/desired width
                if len(test_line) < 40:
                     current_line = test_line
                else:
                     lines.append(current_line)
                     current_line = word
            lines.append(current_line) # Add the last line
        else:
            lines = ["Invalid Text"]


        y_text = (1024 - (len(lines) * (font.size + 5))) / 2 # Calculate starting y

        for line in lines:
             text_bbox = draw.textbbox((0, 0), line, font=font)
             text_width = text_bbox[2] - text_bbox[0]
             position = ((1024 - text_width) / 2, y_text)
             draw.text(position, line, fill=(255, 255, 255), font=font) # White text
             y_text += font.size + 5 # Move y down for next line


    except ImportError:
        print("Pillow's ImageDraw/ImageFont not fully available. Placeholder will be blank gray.")
    except Exception as e:
        print(f"Error drawing text on placeholder: {e}")

    return img


# --- Main Gradio App Function ---

def music_generator_app(genre, mood, purpose, description):
    """Orchestrates lyric and cover art generation."""
    print("\n" + "="*30)
    print("--- Starting Music Generation Request ---")
    print(f"Inputs: Genre='{genre}', Mood='{mood}', Purpose='{purpose}', Desc='{description[:50]}...'")
    print("="*30 + "\n")


    # 1. Generate Lyrics
    song_title, lyrics = generate_lyrics(genre, mood, purpose, description)

    if "Error Generating Lyrics" in song_title or "Blocked Content" in song_title or "Empty Response" in song_title:
        # Handle lyric generation failure
        placeholder_img = create_placeholder_image("Lyric Gen Failed\n" + song_title)
        print(f"Lyric generation failed. Title: {song_title}, Message: {lyrics}")
        return song_title, lyrics, placeholder_img # Return error messages and placeholder

    print(f"\n--- Lyrics Generated ---\nTitle: {song_title}\nLyrics:\n{lyrics[:150]}...\n------------------------")

    # 2. Generate Cover Art (using actual Together AI call)
    theme, key_imagery = extract_themes_for_cover_art(lyrics, mood, purpose)
    cover_art = generate_cover_art_togetherai(song_title, genre, mood, theme, key_imagery) # <-- Use the new function

    print("--- Music Generation Complete ---")
    return song_title, lyrics, cover_art

# --- Gradio Interface ---

with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# 🎵 Music Generator 🎶")
    gr.Markdown("Generate song lyrics (Gemini 1.5 Pro) and album cover art (Stable Diffusion XL via Together AI) based on your ideas.")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Input Details")
            input_genre = gr.Textbox(label="Genre", placeholder="e.g., Indie Folk, Synthwave, Power Metal")
            input_mood = gr.Textbox(label="Mood", placeholder="e.g., Melancholic, Hopeful, Energetic, Mysterious")
            input_purpose = gr.Textbox(label="Purpose / Theme", placeholder="e.g., Overcoming hardship, A rainy night drive, Celebrating friendship")
            input_description = gr.Textbox(label="Additional Description (Optional)", lines=3, placeholder="e.g., Include imagery of stars and oceans, mention a specific city, focus on a specific instrument's feel")
            generate_button = gr.Button("✨ Generate Music Concept ✨", variant="primary")

        with gr.Column(scale=2):
            gr.Markdown("### Generated Output")
            output_title = gr.Textbox(label="Song Title", interactive=False)
            output_lyrics = gr.Textbox(label="Lyrics", lines=15, interactive=False, max_lines=30) # Allow more lines for display
            output_cover_art = gr.Image(label="Generated Cover Art", type="pil", width=512, height=512) # Use PIL format


    generate_button.click(
        fn=music_generator_app,
        inputs=[input_genre, input_mood, input_purpose, input_description],
        outputs=[output_title, output_lyrics, output_cover_art]
    )

    gr.Markdown("---")
    gr.Markdown("Powered by Google Gemini (`gemini-1.5-pro-latest`) and Together AI (`stabilityai/stable-diffusion-xl-base-1.0`).")
    gr.Markdown("**Requires environment variables:** `GEMINI_API_KEY` and `HF_TOKEN` (containing your Together AI key).")

# --- Launch the App ---
if __name__ == "__main__":
    # Ensure necessary libraries for placeholder image text drawing are available
    try:
        from PIL import ImageDraw, ImageFont
    except ImportError:
        print("\nWARNING: Pillow's ImageDraw or ImageFont not found. Placeholder images may lack text.")
        print("Install them with: pip install Pillow\n")

    app.launch(debug=True) # Set debug=False for deployment