Spaces:

tonko22
/

LyricsAnalyzerAgent

Sleeping

App Files Files Community

LyricsAnalyzerAgent / tools /image_generation_tools.py

tonko22

hmmmm

d83c062 4 months ago

raw

history blame contribute delete

7.14 kB

	"""
	Image generation tools for visualizing song analysis results.
	"""

	import os
	from typing import Dict
	from loguru import logger
	from smolagents import Tool

	from api_utils import make_api_call_with_retry


	def caption_gen_tool(analysis_json: Dict, title: str, artist: str) -> str:
	"""
	Generate a descriptive caption for image generation based on song analysis.
	Uses LLM to create a high-quality image prompt based on the analysis.

	Args:
	analysis_json: Dictionary containing the song analysis results
	title: Song title (required)
	artist: Song artist

	Returns:
	A descriptive caption suitable for image generation
	"""
	logger.info("Generating image caption from analysis results")

	# Use the provided title and artist
	logger.info(f"Using song: '{title}' by '{artist}' for caption generation")
	mood = analysis_json.get("mood") or "emotional"
	themes = ", ".join(analysis_json.get("main_themes") or ["music"])
	summary = analysis_json.get("summary") or ""
	conclusion = analysis_json.get("conclusion") or ""

	# Create an API prompt to generate a high-quality image caption
	prompt = f"""Generate a detailed, vivid, and artistic image generation prompt based on the following song analysis.
	This prompt will be used by an AI image generator to create a visual representation of the song's essence.

	Song: {title} by {artist}
	Mood: {mood}
	Themes: {themes}
	Summary: {summary[:200] if summary else ""}
	Conclusion: {conclusion[:200] if conclusion else ""}

	Your task is to create a single paragraph (approximately 100-150 words) that vividly describes a scene or abstract image
	that captures the emotional essence and themes of this song. The description should be detailed, visual, and evocative.
	DO NOT include any text, words, or lyrics in the image description. Focus on colors, composition, mood, symbols, and visuals only.

	ONLY output the final image generation prompt with no additional text, explanations, or formatting.
	"""

	# Use the same model as in lyrics analysis
	model_to_use = "openrouter/google/gemini-2.0-flash-lite-preview-02-05:free"
	logger.info("Using {} for caption generation", model_to_use)

	# Call the API to generate a caption
	logger.info("Generating image caption for song: '{}' by '{}'", title, artist)
	response_text = make_api_call_with_retry(model_to_use, prompt)

	# Clean up the response if needed
	caption = response_text.strip()
	logger.debug(f"Generated image caption: {caption[:100]}...")

	return caption


	class GenerateImageTool(Tool):
	"""Tool for generating images based on song analysis"""

	name = "generate_image"
	description = "Generates an image based on the song analysis results"
	inputs = {
	"analysis_json": {"type": "any", "description": "JSON dictionary containing the analysis results"},
	"title": {"type": "string", "description": "Title of the song"},
	"artist": {"type": "string", "description": "Artist of the song"}
	}
	output_type = "string"

	def generate_with_gemini(self, caption: str) -> str:
	"""
	Generate image using Gemini API directly

	Args:
	caption: The prompt text for image generation

	Returns:
	HTML img tag with the image or error message
	"""
	try:
	# Правильный импорт библиотеки
	from google import genai
	from google.genai import types
	from io import BytesIO
	import base64

	# Get API key from environment variable
	api_key = os.environ.get("GEMINI_API_KEY")
	if not api_key:
	logger.error("GEMINI_API_KEY not found in environment variables")
	return "<p>Error: Gemini API key not found. Please set the GEMINI_API_KEY environment variable.</p>"

	logger.info("Initializing Gemini client")
	# Новый способ настройки клиента
	client = genai.Client(api_key=api_key)

	logger.info("Generating image with Gemini")
	response = client.models.generate_content(
	model="gemini-2.0-flash-exp-image-generation",
	contents=caption,
	config=types.GenerateContentConfig(
	response_modalities=['Text', 'Image']
	)
	)

	# Process the response
	for part in response.candidates[0].content.parts:
	if part.text is not None:
	logger.info(f"Gemini response text: {part.text[:100]}...")
	elif part.inline_data is not None:
	# Извлекаем данные изображения
	image_data = part.inline_data.data

	# Преобразуем в base64 для HTML, если нужно
	if isinstance(image_data, bytes):
	image_b64 = base64.b64encode(image_data).decode('utf-8')
	else:
	# Если данные уже в base64
	image_b64 = image_data

	img_html = f'<img src="data:image/png;base64,{image_b64}" alt="Generated image based on song analysis" style="max-width:100%; border-radius:10px; box-shadow:0 4px 8px rgba(0,0,0,0.1);">'
	return img_html

	return "<p>Error: No image generated by Gemini API.</p>"

	except ImportError:
	logger.error("Google GenAI package not installed")
	return "<p>Error: Google GenAI package not installed. Install with 'pip install google-generativeai'</p>"
	except Exception as e:
	logger.error(f"Error generating image with Gemini: {str(e)}")
	return f"<p>Error generating image with Gemini: {str(e)}</p>"

	def forward(self, analysis_json: Dict, title: str, artist: str) -> str:
	"""
	Generates an image based on the analysis results using Gemini API.

	Args:
	analysis_json: Dictionary containing the analysis results
	title: Song title
	artist: Song artist (required)

	Returns:
	HTML img tag with the image or error message
	"""
	try:
	# Generate caption for the image
	caption = caption_gen_tool(analysis_json, title=title, artist=artist)
	logger.info("Caption generated successfully")

	logger.warning("OpenRouter failed, falling back to Gemini API")
	# Fall back to Gemini API
	result = self.generate_with_gemini(caption)
	return result

	except Exception as e:
	logger.error(f"Error in image generation: {str(e)}")
	return f"<p>Error in image generation: {str(e)}</p>"