Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import sys | |
import os | |
import random | |
from datetime import datetime, timedelta | |
import sqlite3 | |
# Add parent directory to path so we can import modules | |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
from init_db import ModelUsage, session_factory | |
# Models and providers to use in test data | |
MODELS = { | |
"gpt-3.5-turbo": {"provider": "OpenAI", "cost_per_1k": 0.0015}, | |
"gpt-4": {"provider": "OpenAI", "cost_per_1k": 0.03}, | |
"gpt-4o": {"provider": "OpenAI", "cost_per_1k": 0.01}, | |
"gpt-4o-mini": {"provider": "OpenAI", "cost_per_1k": 0.0015}, | |
"o1-mini": {"provider": "OpenAI", "cost_per_1k": 0.00015}, | |
"claude-3-opus": {"provider": "Anthropic", "cost_per_1k": 0.015}, | |
"claude-3-sonnet": {"provider": "Anthropic", "cost_per_1k": 0.008}, | |
"claude-3-haiku": {"provider": "Anthropic", "cost_per_1k": 0.003}, | |
"llama-3-8b": {"provider": "Groq", "cost_per_1k": 0.0005}, | |
"llama-3-70b": {"provider": "Groq", "cost_per_1k": 0.002}, | |
} | |
# User IDs to use (can be random if you don't have specific users) | |
USER_IDS = [1, 2, 3, 4, 5] | |
def generate_test_data(num_records=100): | |
"""Generate test model usage data""" | |
session = session_factory() | |
try: | |
# Generate records for the past 30 days | |
end_date = datetime.utcnow() | |
start_date = end_date - timedelta(days=30) | |
for _ in range(num_records): | |
# Random timestamp within the date range | |
random_days = random.randint(0, 30) | |
timestamp = end_date - timedelta(days=random_days, | |
hours=random.randint(0, 23), | |
minutes=random.randint(0, 59)) | |
# Select random model and user | |
model_name = random.choice(list(MODELS.keys())) | |
model_info = MODELS[model_name] | |
user_id = random.choice(USER_IDS) | |
# Generate random token counts | |
prompt_tokens = random.randint(100, 1000) | |
completion_tokens = random.randint(50, 500) | |
total_tokens = prompt_tokens + completion_tokens | |
# Calculate cost | |
cost = (total_tokens / 1000) * model_info["cost_per_1k"] | |
# Create model usage record | |
usage = ModelUsage( | |
user_id=user_id, | |
chat_id=random.randint(1, 50), # Random chat ID | |
model_name=model_name, | |
provider=model_info["provider"], | |
prompt_tokens=prompt_tokens, | |
completion_tokens=completion_tokens, | |
total_tokens=total_tokens, | |
query_size=prompt_tokens * 4, # Approximate characters | |
response_size=completion_tokens * 4, # Approximate characters | |
cost=cost, | |
timestamp=timestamp, | |
is_streaming=random.choice([True, False]), | |
request_time_ms=random.randint(500, 5000) # Between 0.5 and 5 seconds | |
) | |
session.add(usage) | |
session.commit() | |
print(f"Successfully generated {num_records} test records") | |
except Exception as e: | |
session.rollback() | |
print(f"Error generating test data: {e}") | |
finally: | |
session.close() | |
if __name__ == "__main__": | |
# Default to 100 records, but allow command line override | |
num_records = int(sys.argv[1]) if len(sys.argv) > 1 else 100 | |
generate_test_data(num_records) | |
print("Done! The model_usage table has been populated with test data.") |