davidpomerenke commited on
Commit
0a17acf
·
verified ·
1 Parent(s): 9ea2dd3

Upload from GitHub Actions: added opus 4.5

Browse files
Files changed (1) hide show
  1. evals/models.py +4 -2
evals/models.py CHANGED
@@ -27,6 +27,7 @@ important_models = [
27
  "openai/gpt-4o", # 10$
28
  "openai/gpt-3.5-turbo", # $1.50
29
  "openai/gpt-oss-120b",
 
30
  "anthropic/claude-sonnet-4.5",
31
  "anthropic/claude-haiku-4.5",
32
  "anthropic/claude-opus-4.1", # 15$
@@ -37,12 +38,13 @@ important_models = [
37
  "mistralai/mistral-medium-3.1",
38
  "mistralai/mistral-saba", # 0.6$
39
  "mistralai/mistral-nemo", # 0.08$
40
- "google/gemini-3-pro-preview",
41
  "google/gemini-2.5-pro", # $10
42
  "google/gemini-2.5-flash", # 0.6$
43
  "google/gemini-2.5-flash-lite", # 0.3$
44
  "google/gemma-3-27b-it", # 0.2$
45
  # "x-ai/grok-4", # $15
 
46
  "x-ai/grok-4-fast",
47
  # "x-ai/grok-3", # $15
48
  "cohere/command-a",
@@ -382,7 +384,7 @@ def load_models(date: date) -> pd.DataFrame:
382
  "models_unfiltered.json", orient="records", indent=2, force_ascii=False
383
  )
384
  # Filter out expensive models to keep costs reasonable
385
- models = models[models["cost"] <= 15.0].reset_index(drop=True)
386
  models["tasks"] = [
387
  [
388
  "translation_from",
 
27
  "openai/gpt-4o", # 10$
28
  "openai/gpt-3.5-turbo", # $1.50
29
  "openai/gpt-oss-120b",
30
+ "anthropic/claude-opus-4.5", # 25$
31
  "anthropic/claude-sonnet-4.5",
32
  "anthropic/claude-haiku-4.5",
33
  "anthropic/claude-opus-4.1", # 15$
 
38
  "mistralai/mistral-medium-3.1",
39
  "mistralai/mistral-saba", # 0.6$
40
  "mistralai/mistral-nemo", # 0.08$
41
+ "google/gemini-3-pro-preview", # 12$
42
  "google/gemini-2.5-pro", # $10
43
  "google/gemini-2.5-flash", # 0.6$
44
  "google/gemini-2.5-flash-lite", # 0.3$
45
  "google/gemma-3-27b-it", # 0.2$
46
  # "x-ai/grok-4", # $15
47
+ "x-ai/grok-4.1-fast:free", #free for now
48
  "x-ai/grok-4-fast",
49
  # "x-ai/grok-3", # $15
50
  "cohere/command-a",
 
384
  "models_unfiltered.json", orient="records", indent=2, force_ascii=False
385
  )
386
  # Filter out expensive models to keep costs reasonable
387
+ models = models[models["cost"] <= 25.0].reset_index(drop=True)
388
  models["tasks"] = [
389
  [
390
  "translation_from",