mbti-pocketflow / utils /call_llm.py
Fancellu's picture
Revert "Trying different question link format"
7e9544b
from google import genai
import os
import logging
import json
from datetime import datetime
# Configure logging
log_directory = os.getenv("LOG_DIR", "logs")
os.makedirs(log_directory, exist_ok=True)
log_file = os.path.join(
log_directory, f"llm_calls_{datetime.now().strftime('%Y%m%d')}.log"
)
# Set up logger
logger = logging.getLogger("llm_logger")
logger.setLevel(logging.INFO)
logger.propagate = False # Prevent propagation to root logger
# Create formatter
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
# Add file handler
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# Add stdout handler
stdout_handler = logging.StreamHandler()
stdout_handler.setFormatter(formatter)
# logger.addHandler(stdout_handler)
# Simple cache configuration - use temp directory for HF Spaces
import tempfile
cache_file = os.path.join(tempfile.gettempdir(), "llm_cache.json")
# By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
def call_llm(prompt: str, use_cache: bool = True) -> str:
# Log the prompt
logger.info(f"PROMPT: {prompt}")
# Check cache if enabled
if use_cache:
# Load cache from disk
cache = {}
if os.path.exists(cache_file):
try:
with open(cache_file, "r", encoding="utf-8") as f:
cache = json.load(f)
except:
logger.warning(f"Failed to load cache, starting with empty cache")
# Return from cache if exists
if prompt in cache:
logger.info(f"RESPONSE cached: {cache[prompt]}")
return cache[prompt]
# # Call the LLM if not in cache or cache disabled
# client = genai.Client(
# vertexai=True,
# # TODO: change to your own project id and location
# project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
# location=os.getenv("GEMINI_LOCATION", "us-central1")
# )
# You can comment the previous line and use the AI Studio key instead:
client = genai.Client(
api_key=os.getenv("GEMINI_API_KEY", ""),
)
# pro was too slow and janky
model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
# model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash-preview-04-17")
response = client.models.generate_content(model=model, contents=[prompt])
response_text = response.text
# Log the response
logger.info(f"RESPONSE: {response_text}")
# Update cache if enabled
if use_cache:
# Load cache again to avoid overwrites
cache = {}
if os.path.exists(cache_file):
try:
with open(cache_file, "r", encoding="utf-8") as f:
cache = json.load(f)
except:
pass
# Add to cache and save
cache[prompt] = response_text
try:
with open(cache_file, "w", encoding="utf-8") as f:
json.dump(cache, f)
except Exception as e:
logger.error(f"Failed to save cache: {e}")
return response_text
if __name__ == "__main__":
test_prompt = "Hello, how are you?"
# First call - should hit the API
print("Making call...")
response1 = call_llm(test_prompt, use_cache=False)
print(f"Response: {response1}")