""" Image generation tools for visualizing song analysis results. """ import os from typing import Dict from loguru import logger from smolagents import Tool from api_utils import make_api_call_with_retry def caption_gen_tool(analysis_json: Dict, title: str, artist: str) -> str: """ Generate a descriptive caption for image generation based on song analysis. Uses LLM to create a high-quality image prompt based on the analysis. Args: analysis_json: Dictionary containing the song analysis results title: Song title (required) artist: Song artist Returns: A descriptive caption suitable for image generation """ logger.info("Generating image caption from analysis results") # Use the provided title and artist logger.info(f"Using song: '{title}' by '{artist}' for caption generation") mood = analysis_json.get("mood") or "emotional" themes = ", ".join(analysis_json.get("main_themes") or ["music"]) summary = analysis_json.get("summary") or "" conclusion = analysis_json.get("conclusion") or "" # Create an API prompt to generate a high-quality image caption prompt = f"""Generate a detailed, vivid, and artistic image generation prompt based on the following song analysis. This prompt will be used by an AI image generator to create a visual representation of the song's essence. Song: {title} by {artist} Mood: {mood} Themes: {themes} Summary: {summary[:200] if summary else ""} Conclusion: {conclusion[:200] if conclusion else ""} Your task is to create a single paragraph (approximately 100-150 words) that vividly describes a scene or abstract image that captures the emotional essence and themes of this song. The description should be detailed, visual, and evocative. DO NOT include any text, words, or lyrics in the image description. Focus on colors, composition, mood, symbols, and visuals only. ONLY output the final image generation prompt with no additional text, explanations, or formatting. """ # Use the same model as in lyrics analysis model_to_use = "openrouter/google/gemini-2.0-flash-lite-preview-02-05:free" logger.info("Using {} for caption generation", model_to_use) # Call the API to generate a caption logger.info("Generating image caption for song: '{}' by '{}'", title, artist) response_text = make_api_call_with_retry(model_to_use, prompt) # Clean up the response if needed caption = response_text.strip() logger.debug(f"Generated image caption: {caption[:100]}...") return caption class GenerateImageTool(Tool): """Tool for generating images based on song analysis""" name = "generate_image" description = "Generates an image based on the song analysis results" inputs = { "analysis_json": {"type": "any", "description": "JSON dictionary containing the analysis results"}, "title": {"type": "string", "description": "Title of the song"}, "artist": {"type": "string", "description": "Artist of the song"} } output_type = "string" def generate_with_gemini(self, caption: str) -> str: """ Generate image using Gemini API directly Args: caption: The prompt text for image generation Returns: HTML img tag with the image or error message """ try: # Правильный импорт библиотеки from google import genai from google.genai import types from io import BytesIO import base64 # Get API key from environment variable api_key = os.environ.get("GEMINI_API_KEY") if not api_key: logger.error("GEMINI_API_KEY not found in environment variables") return "
Error: Gemini API key not found. Please set the GEMINI_API_KEY environment variable.
" logger.info("Initializing Gemini client") # Новый способ настройки клиента client = genai.Client(api_key=api_key) logger.info("Generating image with Gemini") response = client.models.generate_content( model="gemini-2.0-flash-exp-image-generation", contents=caption, config=types.GenerateContentConfig( response_modalities=['Text', 'Image'] ) ) # Process the response for part in response.candidates[0].content.parts: if part.text is not None: logger.info(f"Gemini response text: {part.text[:100]}...") elif part.inline_data is not None: # Извлекаем данные изображения image_data = part.inline_data.data # Преобразуем в base64 для HTML, если нужно if isinstance(image_data, bytes): image_b64 = base64.b64encode(image_data).decode('utf-8') else: # Если данные уже в base64 image_b64 = image_data img_html = f'Error: No image generated by Gemini API.
" except ImportError: logger.error("Google GenAI package not installed") return "Error: Google GenAI package not installed. Install with 'pip install google-generativeai'
" except Exception as e: logger.error(f"Error generating image with Gemini: {str(e)}") return f"Error generating image with Gemini: {str(e)}
" def forward(self, analysis_json: Dict, title: str, artist: str) -> str: """ Generates an image based on the analysis results using Gemini API. Args: analysis_json: Dictionary containing the analysis results title: Song title artist: Song artist (required) Returns: HTML img tag with the image or error message """ try: # Generate caption for the image caption = caption_gen_tool(analysis_json, title=title, artist=artist) logger.info("Caption generated successfully") logger.warning("OpenRouter failed, falling back to Gemini API") # Fall back to Gemini API result = self.generate_with_gemini(caption) return result except Exception as e: logger.error(f"Error in image generation: {str(e)}") return f"Error in image generation: {str(e)}
"