File size: 3,583 Bytes
4d16728
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d32c69c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import sys
import os
import random
from datetime import datetime, timedelta
import sqlite3

# Add parent directory to path so we can import modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from init_db import ModelUsage, session_factory

# Models and providers to use in test data
MODELS = {
    "gpt-3.5-turbo": {"provider": "OpenAI", "cost_per_1k": 0.0015},
    "gpt-4": {"provider": "OpenAI", "cost_per_1k": 0.03},
    "gpt-4o": {"provider": "OpenAI", "cost_per_1k": 0.01},
    "gpt-4o-mini": {"provider": "OpenAI", "cost_per_1k": 0.0015},
    "o1-mini": {"provider": "OpenAI", "cost_per_1k": 0.00015},
    "claude-3-opus": {"provider": "Anthropic", "cost_per_1k": 0.015},
    "claude-3-sonnet": {"provider": "Anthropic", "cost_per_1k": 0.008},
    "claude-3-haiku": {"provider": "Anthropic", "cost_per_1k": 0.003},
    "llama-3-8b": {"provider": "Groq", "cost_per_1k": 0.0005},
    "llama-3-70b": {"provider": "Groq", "cost_per_1k": 0.002},
}

# User IDs to use (can be random if you don't have specific users)
USER_IDS = [1, 2, 3, 4, 5]

def generate_test_data(num_records=100):
    """Generate test model usage data"""
    session = session_factory()
    
    try:
        # Generate records for the past 30 days
        end_date = datetime.utcnow()
        start_date = end_date - timedelta(days=30)
        
        for _ in range(num_records):
            # Random timestamp within the date range
            random_days = random.randint(0, 30)
            timestamp = end_date - timedelta(days=random_days, 
                                           hours=random.randint(0, 23),
                                           minutes=random.randint(0, 59))
            
            # Select random model and user
            model_name = random.choice(list(MODELS.keys()))
            model_info = MODELS[model_name]
            user_id = random.choice(USER_IDS)
            
            # Generate random token counts
            prompt_tokens = random.randint(100, 1000)
            completion_tokens = random.randint(50, 500)
            total_tokens = prompt_tokens + completion_tokens
            
            # Calculate cost
            cost = (total_tokens / 1000) * model_info["cost_per_1k"]
            
            # Create model usage record
            usage = ModelUsage(
                user_id=user_id,
                chat_id=random.randint(1, 50),  # Random chat ID
                model_name=model_name,
                provider=model_info["provider"],
                prompt_tokens=prompt_tokens,
                completion_tokens=completion_tokens,
                total_tokens=total_tokens,
                query_size=prompt_tokens * 4,  # Approximate characters
                response_size=completion_tokens * 4,  # Approximate characters
                cost=cost,
                timestamp=timestamp,
                is_streaming=random.choice([True, False]),
                request_time_ms=random.randint(500, 5000)  # Between 0.5 and 5 seconds
            )
            session.add(usage)
        
        session.commit()
        print(f"Successfully generated {num_records} test records")
    
    except Exception as e:
        session.rollback()
        print(f"Error generating test data: {e}")
    
    finally:
        session.close()

if __name__ == "__main__":
    # Default to 100 records, but allow command line override
    num_records = int(sys.argv[1]) if len(sys.argv) > 1 else 100
    generate_test_data(num_records)
    print("Done! The model_usage table has been populated with test data.")