Spaces:
Running
Running
from google import genai | |
import os | |
import logging | |
import json | |
from datetime import datetime | |
# Configure logging | |
log_directory = os.getenv("LOG_DIR", "logs") | |
os.makedirs(log_directory, exist_ok=True) | |
log_file = os.path.join( | |
log_directory, f"llm_calls_{datetime.now().strftime('%Y%m%d')}.log" | |
) | |
# Set up logger | |
logger = logging.getLogger("llm_logger") | |
logger.setLevel(logging.INFO) | |
logger.propagate = False # Prevent propagation to root logger | |
# Create formatter | |
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") | |
# Add file handler | |
file_handler = logging.FileHandler(log_file, encoding='utf-8') | |
file_handler.setFormatter(formatter) | |
logger.addHandler(file_handler) | |
# Add stdout handler | |
stdout_handler = logging.StreamHandler() | |
stdout_handler.setFormatter(formatter) | |
# logger.addHandler(stdout_handler) | |
# Simple cache configuration - use temp directory for HF Spaces | |
import tempfile | |
cache_file = os.path.join(tempfile.gettempdir(), "llm_cache.json") | |
# By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding | |
def call_llm(prompt: str, use_cache: bool = True) -> str: | |
# Log the prompt | |
logger.info(f"PROMPT: {prompt}") | |
# Check cache if enabled | |
if use_cache: | |
# Load cache from disk | |
cache = {} | |
if os.path.exists(cache_file): | |
try: | |
with open(cache_file, "r", encoding="utf-8") as f: | |
cache = json.load(f) | |
except: | |
logger.warning(f"Failed to load cache, starting with empty cache") | |
# Return from cache if exists | |
if prompt in cache: | |
logger.info(f"RESPONSE cached: {cache[prompt]}") | |
return cache[prompt] | |
# # Call the LLM if not in cache or cache disabled | |
# client = genai.Client( | |
# vertexai=True, | |
# # TODO: change to your own project id and location | |
# project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"), | |
# location=os.getenv("GEMINI_LOCATION", "us-central1") | |
# ) | |
# You can comment the previous line and use the AI Studio key instead: | |
client = genai.Client( | |
api_key=os.getenv("GEMINI_API_KEY", ""), | |
) | |
# pro was too slow and janky | |
model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash") | |
# model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash-preview-04-17") | |
response = client.models.generate_content(model=model, contents=[prompt]) | |
response_text = response.text | |
# Log the response | |
logger.info(f"RESPONSE: {response_text}") | |
# Update cache if enabled | |
if use_cache: | |
# Load cache again to avoid overwrites | |
cache = {} | |
if os.path.exists(cache_file): | |
try: | |
with open(cache_file, "r", encoding="utf-8") as f: | |
cache = json.load(f) | |
except: | |
pass | |
# Add to cache and save | |
cache[prompt] = response_text | |
try: | |
with open(cache_file, "w", encoding="utf-8") as f: | |
json.dump(cache, f) | |
except Exception as e: | |
logger.error(f"Failed to save cache: {e}") | |
return response_text | |
if __name__ == "__main__": | |
test_prompt = "Hello, how are you?" | |
# First call - should hit the API | |
print("Making call...") | |
response1 = call_llm(test_prompt, use_cache=False) | |
print(f"Response: {response1}") |