Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Create model_registry.py
Browse files- src/utils/model_registry.py +212 -0
src/utils/model_registry.py
ADDED
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Models registry for the Auto-Analyst application.
|
3 |
+
This file serves as the single source of truth for all model information.
|
4 |
+
"""
|
5 |
+
|
6 |
+
# Model providers
|
7 |
+
PROVIDERS = {
|
8 |
+
"openai": "OpenAI",
|
9 |
+
"anthropic": "Anthropic",
|
10 |
+
"groq": "GROQ",
|
11 |
+
"gemini": "Google Gemini"
|
12 |
+
}
|
13 |
+
|
14 |
+
# Cost per 1K tokens for different models
|
15 |
+
MODEL_COSTS = {
|
16 |
+
"openai": {
|
17 |
+
"gpt-4.1": {"input": 0.002, "output": 0.008},
|
18 |
+
"gpt-4.1-mini": {"input": 0.0004, "output": 0.0016},
|
19 |
+
"gpt-4.1-nano": {"input": 0.00010, "output": 0.0004},
|
20 |
+
"gpt-4.5-preview": {"input": 0.075, "output": 0.15},
|
21 |
+
"gpt-4o": {"input": 0.0025, "output": 0.01},
|
22 |
+
"gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
|
23 |
+
"o1": {"input": 0.015, "output": 0.06},
|
24 |
+
"o1-pro": {"input": 0.015, "output": 0.6},
|
25 |
+
"o1-mini": {"input": 0.00011, "output": 0.00044},
|
26 |
+
"o3": {"input": 0.001, "output": 0.04},
|
27 |
+
"o3-mini": {"input": 0.00011, "output": 0.00044},
|
28 |
+
"gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
|
29 |
+
},
|
30 |
+
"anthropic": {
|
31 |
+
"claude-3-opus-latest": {"input": 0.015, "output": 0.075},
|
32 |
+
"claude-3-7-sonnet-latest": {"input": 0.003, "output": 0.015},
|
33 |
+
"claude-3-5-sonnet-latest": {"input": 0.003, "output": 0.015},
|
34 |
+
"claude-3-5-haiku-latest": {"input": 0.0008, "output": 0.0004},
|
35 |
+
},
|
36 |
+
"groq": {
|
37 |
+
"deepseek-r1-distill-llama-70b": {"input": 0.00075, "output": 0.00099},
|
38 |
+
"llama-3.3-70b-versatile": {"input": 0.00059, "output": 0.00079},
|
39 |
+
"llama3-8b-8192": {"input": 0.00005, "output": 0.00008},
|
40 |
+
"llama3-70b-8192": {"input": 0.00059, "output": 0.00079},
|
41 |
+
"mistral-saba-24b": {"input": 0.00079, "output": 0.00079},
|
42 |
+
"gemma2-9b-it": {"input": 0.0002, "output": 0.0002},
|
43 |
+
"qwen-qwq-32b": {"input": 0.00029, "output": 0.00039},
|
44 |
+
"meta-llama/llama-4-maverick-17b-128e-instruct": {"input": 0.0002, "output": 0.0006},
|
45 |
+
"meta-llama/llama-4-scout-17b-16e-instruct": {"input": 0.00011, "output": 0.00034},
|
46 |
+
"deepseek-r1-distill-qwen-32b": {"input": 0.00075, "output": 0.00099},
|
47 |
+
"llama-3.1-70b-versatile": {"input": 0.00059, "output": 0.00079},
|
48 |
+
},
|
49 |
+
"gemini": {
|
50 |
+
"gemini-2.5-pro-preview-03-25": {"input": 0.00015, "output": 0.001}
|
51 |
+
}
|
52 |
+
}
|
53 |
+
|
54 |
+
# Tiers based on cost per 1K tokens
|
55 |
+
MODEL_TIERS = {
|
56 |
+
"tier1": {
|
57 |
+
"name": "Basic",
|
58 |
+
"credits": 1,
|
59 |
+
"models": [
|
60 |
+
"llama3-8b-8192",
|
61 |
+
"llama-3.1-8b-instant",
|
62 |
+
"gemma2-9b-it",
|
63 |
+
"meta-llama/llama-4-scout-17b-16e-instruct",
|
64 |
+
"llama-3.2-1b-preview",
|
65 |
+
"llama-3.2-3b-preview",
|
66 |
+
"llama-3.2-11b-text-preview",
|
67 |
+
"llama-3.2-11b-vision-preview",
|
68 |
+
"llama3-groq-8b-8192-tool-use-preview"
|
69 |
+
]
|
70 |
+
},
|
71 |
+
"tier2": {
|
72 |
+
"name": "Standard",
|
73 |
+
"credits": 3,
|
74 |
+
"models": [
|
75 |
+
"gpt-4.1-nano",
|
76 |
+
"gpt-4o-mini",
|
77 |
+
"o1-mini",
|
78 |
+
"o3-mini",
|
79 |
+
"qwen-qwq-32b",
|
80 |
+
"meta-llama/llama-4-maverick-17b-128e-instruct"
|
81 |
+
]
|
82 |
+
},
|
83 |
+
"tier3": {
|
84 |
+
"name": "Premium",
|
85 |
+
"credits": 5,
|
86 |
+
"models": [
|
87 |
+
"gpt-4.1",
|
88 |
+
"gpt-4.1-mini",
|
89 |
+
"gpt-4.5-preview",
|
90 |
+
"gpt-4o",
|
91 |
+
"o1",
|
92 |
+
"o1-pro",
|
93 |
+
"o3",
|
94 |
+
"gpt-3.5-turbo",
|
95 |
+
"claude-3-opus-latest",
|
96 |
+
"claude-3-7-sonnet-latest",
|
97 |
+
"claude-3-5-sonnet-latest",
|
98 |
+
"claude-3-5-haiku-latest",
|
99 |
+
"deepseek-r1-distill-llama-70b",
|
100 |
+
"llama-3.3-70b-versatile",
|
101 |
+
"llama3-70b-8192",
|
102 |
+
"mistral-saba-24b",
|
103 |
+
"deepseek-r1-distill-qwen-32b",
|
104 |
+
"llama-3.2-90b-text-preview",
|
105 |
+
"llama-3.2-90b-vision-preview",
|
106 |
+
"llama-3.3-70b-specdec",
|
107 |
+
"llama2-70b-4096",
|
108 |
+
"llama-3.1-70b-versatile",
|
109 |
+
"llama-3.1-405b-reasoning",
|
110 |
+
"llama3-groq-70b-8192-tool-use-preview",
|
111 |
+
"gemini-2.5-pro-preview-03-25"
|
112 |
+
]
|
113 |
+
}
|
114 |
+
}
|
115 |
+
|
116 |
+
|
117 |
+
# Model metadata (display name, context window, etc.)
|
118 |
+
MODEL_METADATA = {
|
119 |
+
# OpenAI
|
120 |
+
"gpt-4.1": {"display_name": "GPT-4.1", "context_window": 128000},
|
121 |
+
"gpt-4.1-mini": {"display_name": "GPT-4.1 Mini", "context_window": 128000},
|
122 |
+
"gpt-4.1-nano": {"display_name": "GPT-4.1 Nano", "context_window": 128000},
|
123 |
+
"gpt-4o": {"display_name": "GPT-4o", "context_window": 128000},
|
124 |
+
"gpt-4.5-preview": {"display_name": "GPT-4.5 Preview", "context_window": 128000},
|
125 |
+
"gpt-4o-mini": {"display_name": "GPT-4o Mini", "context_window": 128000},
|
126 |
+
"gpt-3.5-turbo": {"display_name": "GPT-3.5 Turbo", "context_window": 16385},
|
127 |
+
"o1": {"display_name": "o1", "context_window": 128000},
|
128 |
+
"o1-pro": {"display_name": "o1 Pro", "context_window": 128000},
|
129 |
+
"o1-mini": {"display_name": "o1 Mini", "context_window": 128000},
|
130 |
+
"o3": {"display_name": "o3", "context_window": 128000},
|
131 |
+
"o3-mini": {"display_name": "o3 Mini", "context_window": 128000},
|
132 |
+
# Anthropic
|
133 |
+
"claude-3-opus-latest": {"display_name": "Claude 3 Opus", "context_window": 200000},
|
134 |
+
"claude-3-7-sonnet-latest": {"display_name": "Claude 3.7 Sonnet", "context_window": 200000},
|
135 |
+
"claude-3-5-sonnet-latest": {"display_name": "Claude 3.5 Sonnet", "context_window": 200000},
|
136 |
+
"claude-3-5-haiku-latest": {"display_name": "Claude 3.5 Haiku", "context_window": 200000},
|
137 |
+
|
138 |
+
# GROQ
|
139 |
+
"deepseek-r1-distill-llama-70b": {"display_name": "DeepSeek R1 Distill Llama 70b", "context_window": 32768},
|
140 |
+
"llama-3.3-70b-versatile": {"display_name": "Llama 3.3 70b", "context_window": 8192},
|
141 |
+
"llama3-8b-8192": {"display_name": "Llama 3 8b", "context_window": 8192},
|
142 |
+
"llama3-70b-8192": {"display_name": "Llama 3 70b", "context_window": 8192},
|
143 |
+
"mistral-saba-24b": {"display_name": "Mistral Saba 24b", "context_window": 32768},
|
144 |
+
"gemma2-9b-it": {"display_name": "Gemma 2 9b", "context_window": 8192},
|
145 |
+
"qwen-qwq-32b": {"display_name": "Qwen QWQ 32b | Alibaba", "context_window": 32768},
|
146 |
+
"meta-llama/llama-4-maverick-17b-128e-instruct": {"display_name": "Llama 4 Maverick 17b", "context_window": 128000},
|
147 |
+
"meta-llama/llama-4-scout-17b-16e-instruct": {"display_name": "Llama 4 Scout 17b", "context_window": 16000},
|
148 |
+
"llama-3.1-70b-versatile": {"display_name": "Llama 3.1 70b Versatile", "context_window": 8192},
|
149 |
+
|
150 |
+
# Gemini
|
151 |
+
"gemini-2.5-pro-preview-03-25": {"display_name": "Gemini 2.5 Pro", "context_window": 1000000},
|
152 |
+
}
|
153 |
+
|
154 |
+
# Helper functions
|
155 |
+
|
156 |
+
def get_provider_for_model(model_name):
|
157 |
+
"""Determine the provider based on model name"""
|
158 |
+
if not model_name:
|
159 |
+
return "Unknown"
|
160 |
+
|
161 |
+
model_name = model_name.lower()
|
162 |
+
return next((provider for provider, models in MODEL_COSTS.items()
|
163 |
+
if any(model_name in model for model in models)), "Unknown")
|
164 |
+
|
165 |
+
def get_model_tier(model_name):
|
166 |
+
"""Get the tier of a model"""
|
167 |
+
for tier_id, tier_info in MODEL_TIERS.items():
|
168 |
+
if model_name in tier_info["models"]:
|
169 |
+
return tier_id
|
170 |
+
return "tier1" # Default to tier1 if not found
|
171 |
+
|
172 |
+
def calculate_cost(model_name, input_tokens, output_tokens):
|
173 |
+
"""Calculate the cost for using the model based on tokens"""
|
174 |
+
if not model_name:
|
175 |
+
return 0
|
176 |
+
|
177 |
+
# Convert tokens to thousands
|
178 |
+
input_tokens_in_thousands = input_tokens / 1000
|
179 |
+
output_tokens_in_thousands = output_tokens / 1000
|
180 |
+
|
181 |
+
# Get model provider
|
182 |
+
model_provider = get_provider_for_model(model_name)
|
183 |
+
|
184 |
+
# Handle case where model is not found
|
185 |
+
if model_provider == "Unknown" or model_name not in MODEL_COSTS.get(model_provider, {}):
|
186 |
+
return 0
|
187 |
+
|
188 |
+
return (input_tokens_in_thousands * MODEL_COSTS[model_provider][model_name]["input"] +
|
189 |
+
output_tokens_in_thousands * MODEL_COSTS[model_provider][model_name]["output"])
|
190 |
+
|
191 |
+
def get_credit_cost(model_name):
|
192 |
+
"""Get the credit cost for a model"""
|
193 |
+
tier_id = get_model_tier(model_name)
|
194 |
+
return MODEL_TIERS[tier_id]["credits"]
|
195 |
+
|
196 |
+
def get_display_name(model_name):
|
197 |
+
"""Get the display name for a model"""
|
198 |
+
return MODEL_METADATA.get(model_name, {}).get("display_name", model_name)
|
199 |
+
|
200 |
+
def get_context_window(model_name):
|
201 |
+
"""Get the context window size for a model"""
|
202 |
+
return MODEL_METADATA.get(model_name, {}).get("context_window", 4096)
|
203 |
+
|
204 |
+
def get_all_models_for_provider(provider):
|
205 |
+
"""Get all models for a specific provider"""
|
206 |
+
if provider not in MODEL_COSTS:
|
207 |
+
return []
|
208 |
+
return list(MODEL_COSTS[provider].keys())
|
209 |
+
|
210 |
+
def get_models_by_tier(tier_id):
|
211 |
+
"""Get all models for a specific tier"""
|
212 |
+
return MODEL_TIERS.get(tier_id, {}).get("models", [])
|