pipV1 / services /gemini_client.py
Amit
Fix: Update docstring and Imagen 4.0 API usage
94db145
"""
Google Gemini client for Pip.
Handles: Text generation (emotion analysis, conversation) and image generation.
Uses gemini-flash-lite-latest for text, imagen-4.0-fast-generate-001 for images.
"""
import os
import json
from typing import Optional, AsyncGenerator
import google.generativeai as genai
from google.generativeai import types
import base64
class GeminiClient:
"""
Gemini-powered client for Pip.
Primary LLM for emotion analysis, conversation, and image generation.
"""
# Model configurations - using actual available model names
TEXT_MODEL_FAST = "models/gemini-flash-lite-latest"
TEXT_MODEL_PRO = "models/gemini-flash-lite-latest"
IMAGE_MODEL = "models/imagen-4.0-fast-generate-001"
def __init__(self, api_key: str = None):
"""Initialize with optional custom API key."""
self.api_key = api_key or os.getenv("GOOGLE_API_KEY")
self.available = bool(self.api_key)
if self.available:
genai.configure(api_key=self.api_key)
print(f"✅ Gemini: Configured with model {self.TEXT_MODEL_FAST}")
else:
print("⚠️ Gemini: No API key found - service disabled")
# Model instances (lazy loaded)
self._fast_model = None
self._pro_model = None
def is_available(self) -> bool:
"""Check if the client is available."""
return self.available
def _get_fast_model(self):
"""Get fast model for quick responses."""
if self._fast_model is None:
self._fast_model = genai.GenerativeModel(self.TEXT_MODEL_FAST)
return self._fast_model
def _get_pro_model(self):
"""Get pro model for complex reasoning."""
if self._pro_model is None:
self._pro_model = genai.GenerativeModel(self.TEXT_MODEL_PRO)
return self._pro_model
async def analyze_emotion(self, user_input: str, system_prompt: str) -> dict:
"""
Analyze emotional content of user input.
Returns structured emotion analysis.
"""
if not self.available:
return {
"primary_emotions": ["neutral"],
"intensity": 5,
"pip_expression": "neutral",
"intervention_needed": False
}
try:
model = self._get_pro_model()
prompt = f"""{system_prompt}
USER INPUT TO ANALYZE:
{user_input}
Remember: Respond with ONLY valid JSON, no markdown formatting."""
response = await model.generate_content_async(
prompt,
generation_config=types.GenerationConfig(
temperature=0.3,
max_output_tokens=1024,
)
)
result_text = response.text.strip()
# Parse JSON response
if result_text.startswith("```"):
result_text = result_text.split("```")[1]
if result_text.startswith("json"):
result_text = result_text[4:]
return json.loads(result_text)
except json.JSONDecodeError as e:
print(f"Gemini emotion analysis JSON error: {e}")
return {
"primary_emotions": ["neutral"],
"secondary_emotions": [],
"intensity": 5,
"underlying_needs": ["connection"],
"intervention_needed": False
}
except Exception as e:
print(f"Gemini emotion analysis error: {e}")
raise
async def decide_action(self, emotion_state: dict, system_prompt: str) -> dict:
"""
Decide what action Pip should take based on emotion analysis.
"""
try:
model = self._get_fast_model()
prompt = f"""{system_prompt}
EMOTION ANALYSIS:
{json.dumps(emotion_state, indent=2)}
Respond with ONLY valid JSON, no markdown."""
response = await model.generate_content_async(
prompt,
generation_config=types.GenerationConfig(
temperature=0.4,
max_output_tokens=512,
)
)
result_text = response.text.strip()
if result_text.startswith("```"):
result_text = result_text.split("```")[1]
if result_text.startswith("json"):
result_text = result_text[4:]
return json.loads(result_text)
except json.JSONDecodeError:
return {
"action": "reflect",
"image_style": "warm",
"suggested_response_tone": "empathetic"
}
except Exception as e:
print(f"Gemini action decision error: {e}")
raise
async def quick_acknowledge(self, user_input: str, system_prompt: str) -> str:
"""
Generate a quick acknowledgment (< 500ms target).
Uses the fastest available model.
"""
if not self.available:
return "I hear you..."
try:
model = self._get_fast_model()
prompt = f"""{system_prompt}
USER SAID: {user_input}
Respond with JUST the acknowledgment phrase, nothing else."""
response = await model.generate_content_async(
prompt,
generation_config=types.GenerationConfig(
temperature=0.7,
max_output_tokens=50,
)
)
return response.text.strip()
except Exception as e:
print(f"Gemini quick ack error: {e}")
return "I hear you..."
async def generate_response_stream(
self,
user_input: str,
emotion_state: dict,
action: dict,
system_prompt: str,
history: list = None
) -> AsyncGenerator[str, None]:
"""
Generate conversational response with streaming.
"""
try:
model = self._get_pro_model()
# Build context
history_text = ""
if history:
history_text = "\n".join([
f"{m['role'].upper()}: {m['content']}"
for m in history[-6:]
])
prompt = f"""{system_prompt}
EMOTION ANALYSIS:
{json.dumps(emotion_state, indent=2)}
ACTION TO TAKE:
{json.dumps(action, indent=2)}
CONVERSATION HISTORY:
{history_text}
CURRENT USER MESSAGE:
{user_input}
Respond naturally and warmly. Be concise but meaningful."""
response = await model.generate_content_async(
prompt,
generation_config=types.GenerationConfig(
temperature=0.8,
max_output_tokens=500,
),
stream=True
)
async for chunk in response:
if chunk.text:
yield chunk.text
except Exception as e:
print(f"Gemini response stream error: {e}")
yield "I'm here with you. Tell me more about what's on your mind."
async def generate_intervention_response(
self,
user_input: str,
emotion_state: dict,
system_prompt: str
) -> AsyncGenerator[str, None]:
"""
Generate careful intervention response for concerning situations.
"""
try:
model = self._get_pro_model()
prompt = f"""{system_prompt}
USER INPUT: {user_input}
EMOTION ANALYSIS:
{json.dumps(emotion_state, indent=2)}
Respond with care, warmth, and appropriate resources if needed."""
response = await model.generate_content_async(
prompt,
generation_config=types.GenerationConfig(
temperature=0.5,
max_output_tokens=600,
),
stream=True
)
async for chunk in response:
if chunk.text:
yield chunk.text
except Exception as e:
print(f"Gemini intervention error: {e}")
yield "I hear that you're going through something difficult. I'm here with you, and I care about how you're feeling."
async def generate_text(self, prompt: str) -> Optional[str]:
"""
Generate text (for prompts, summaries, etc).
"""
if not self.available:
return None
try:
model = self._get_pro_model()
response = await model.generate_content_async(
prompt,
generation_config=types.GenerationConfig(
temperature=0.7,
max_output_tokens=1024,
)
)
return response.text
except Exception as e:
print(f"Gemini text generation error: {e}")
return None
async def enhance_prompt(
self,
user_input: str,
emotion_state: dict,
mode: str,
system_prompt: str
) -> str:
"""
Enhance a prompt for image generation.
"""
try:
model = self._get_fast_model()
prompt = f"""{system_prompt}
USER INPUT: {user_input}
EMOTIONS: {json.dumps(emotion_state.get('primary_emotions', []))}
MODE: {mode}
Generate the enhanced image prompt only, no explanation."""
response = await model.generate_content_async(
prompt,
generation_config=types.GenerationConfig(
temperature=0.9,
max_output_tokens=200,
)
)
return response.text.strip()
except Exception as e:
print(f"Gemini prompt enhancement error: {e}")
return f"A peaceful scene reflecting {emotion_state.get('primary_emotions', ['calm'])[0]}"
async def generate_image(self, prompt: str) -> Optional[str]:
"""
Generate an image using Imagen 4.0.
Returns base64 encoded image or None.
"""
if not self.available:
return None
try:
# Use GenerativeModel with Imagen 4.0
imagen_model = genai.GenerativeModel(self.IMAGE_MODEL)
response = await imagen_model.generate_content_async(
prompt,
generation_config=types.GenerationConfig(
temperature=1.0,
)
)
# Check for image in response
if response.candidates:
for candidate in response.candidates:
if hasattr(candidate, 'content') and candidate.content.parts:
for part in candidate.content.parts:
if hasattr(part, 'inline_data') and part.inline_data:
# Return base64 encoded image
return base64.b64encode(part.inline_data.data).decode('utf-8')
print("Imagen: No image in response")
return None
except Exception as e:
error_str = str(e)
if "429" in error_str or "quota" in error_str.lower():
print(f"Imagen rate limit exceeded: {e}")
elif "not found" in error_str.lower() or "not supported" in error_str.lower():
print(f"Imagen not available: {e}")
return None
else:
print(f"Imagen generation error: {e}")
return None