Spaces:
Sleeping
Sleeping
""" | |
Image generation tools for visualizing song analysis results. | |
""" | |
import os | |
from typing import Dict | |
from loguru import logger | |
from smolagents import Tool | |
from api_utils import make_api_call_with_retry | |
def caption_gen_tool(analysis_json: Dict, title: str, artist: str) -> str: | |
""" | |
Generate a descriptive caption for image generation based on song analysis. | |
Uses LLM to create a high-quality image prompt based on the analysis. | |
Args: | |
analysis_json: Dictionary containing the song analysis results | |
title: Song title (required) | |
artist: Song artist | |
Returns: | |
A descriptive caption suitable for image generation | |
""" | |
logger.info("Generating image caption from analysis results") | |
# Use the provided title and artist | |
logger.info(f"Using song: '{title}' by '{artist}' for caption generation") | |
mood = analysis_json.get("mood") or "emotional" | |
themes = ", ".join(analysis_json.get("main_themes") or ["music"]) | |
summary = analysis_json.get("summary") or "" | |
conclusion = analysis_json.get("conclusion") or "" | |
# Create an API prompt to generate a high-quality image caption | |
prompt = f"""Generate a detailed, vivid, and artistic image generation prompt based on the following song analysis. | |
This prompt will be used by an AI image generator to create a visual representation of the song's essence. | |
Song: {title} by {artist} | |
Mood: {mood} | |
Themes: {themes} | |
Summary: {summary[:200] if summary else ""} | |
Conclusion: {conclusion[:200] if conclusion else ""} | |
Your task is to create a single paragraph (approximately 100-150 words) that vividly describes a scene or abstract image | |
that captures the emotional essence and themes of this song. The description should be detailed, visual, and evocative. | |
DO NOT include any text, words, or lyrics in the image description. Focus on colors, composition, mood, symbols, and visuals only. | |
ONLY output the final image generation prompt with no additional text, explanations, or formatting. | |
""" | |
# Use the same model as in lyrics analysis | |
model_to_use = "openrouter/google/gemini-2.0-flash-lite-preview-02-05:free" | |
logger.info("Using {} for caption generation", model_to_use) | |
# Call the API to generate a caption | |
logger.info("Generating image caption for song: '{}' by '{}'", title, artist) | |
response_text = make_api_call_with_retry(model_to_use, prompt) | |
# Clean up the response if needed | |
caption = response_text.strip() | |
logger.debug(f"Generated image caption: {caption[:100]}...") | |
return caption | |
class GenerateImageTool(Tool): | |
"""Tool for generating images based on song analysis""" | |
name = "generate_image" | |
description = "Generates an image based on the song analysis results" | |
inputs = { | |
"analysis_json": {"type": "any", "description": "JSON dictionary containing the analysis results"}, | |
"title": {"type": "string", "description": "Title of the song"}, | |
"artist": {"type": "string", "description": "Artist of the song"} | |
} | |
output_type = "string" | |
def generate_with_gemini(self, caption: str) -> str: | |
""" | |
Generate image using Gemini API directly | |
Args: | |
caption: The prompt text for image generation | |
Returns: | |
HTML img tag with the image or error message | |
""" | |
try: | |
# Правильный импорт библиотеки | |
from google import genai | |
from google.genai import types | |
from io import BytesIO | |
import base64 | |
# Get API key from environment variable | |
api_key = os.environ.get("GEMINI_API_KEY") | |
if not api_key: | |
logger.error("GEMINI_API_KEY not found in environment variables") | |
return "<p>Error: Gemini API key not found. Please set the GEMINI_API_KEY environment variable.</p>" | |
logger.info("Initializing Gemini client") | |
# Новый способ настройки клиента | |
client = genai.Client(api_key=api_key) | |
logger.info("Generating image with Gemini") | |
response = client.models.generate_content( | |
model="gemini-2.0-flash-exp-image-generation", | |
contents=caption, | |
config=types.GenerateContentConfig( | |
response_modalities=['Text', 'Image'] | |
) | |
) | |
# Process the response | |
for part in response.candidates[0].content.parts: | |
if part.text is not None: | |
logger.info(f"Gemini response text: {part.text[:100]}...") | |
elif part.inline_data is not None: | |
# Извлекаем данные изображения | |
image_data = part.inline_data.data | |
# Преобразуем в base64 для HTML, если нужно | |
if isinstance(image_data, bytes): | |
image_b64 = base64.b64encode(image_data).decode('utf-8') | |
else: | |
# Если данные уже в base64 | |
image_b64 = image_data | |
img_html = f'<img src="data:image/png;base64,{image_b64}" alt="Generated image based on song analysis" style="max-width:100%; border-radius:10px; box-shadow:0 4px 8px rgba(0,0,0,0.1);">' | |
return img_html | |
return "<p>Error: No image generated by Gemini API.</p>" | |
except ImportError: | |
logger.error("Google GenAI package not installed") | |
return "<p>Error: Google GenAI package not installed. Install with 'pip install google-generativeai'</p>" | |
except Exception as e: | |
logger.error(f"Error generating image with Gemini: {str(e)}") | |
return f"<p>Error generating image with Gemini: {str(e)}</p>" | |
def forward(self, analysis_json: Dict, title: str, artist: str) -> str: | |
""" | |
Generates an image based on the analysis results using Gemini API. | |
Args: | |
analysis_json: Dictionary containing the analysis results | |
title: Song title | |
artist: Song artist (required) | |
Returns: | |
HTML img tag with the image or error message | |
""" | |
try: | |
# Generate caption for the image | |
caption = caption_gen_tool(analysis_json, title=title, artist=artist) | |
logger.info("Caption generated successfully") | |
logger.warning("OpenRouter failed, falling back to Gemini API") | |
# Fall back to Gemini API | |
result = self.generate_with_gemini(caption) | |
return result | |
except Exception as e: | |
logger.error(f"Error in image generation: {str(e)}") | |
return f"<p>Error in image generation: {str(e)}</p>" | |