kaizuberbuehler commited on
Commit
5467082
1 Parent(s): 8e52bb6

Update data; Other changes

Browse files
app.py CHANGED
@@ -78,7 +78,7 @@ def create_chip_designers_data_center_revenue_plot():
78
  def create_size_for_performance_plot(category_to_display: str,
79
  parameter_type_to_display: str,
80
  model_to_compare: str) -> (go.Figure, gr.Dropdown, gr.Dropdown):
81
- with open('elo_results_20240823.pkl', 'rb') as file:
82
  elo_results = pickle.load(file)
83
  categories: list[str] = list(elo_results["text"].keys())
84
  if category_to_display not in categories:
@@ -136,7 +136,7 @@ def create_size_for_performance_plot(category_to_display: str,
136
  yaxis_type='log',
137
  hovermode='x unified',
138
  xaxis=dict(
139
- range=[date(2023, 2, 27), date(2024, 8, 23)],
140
  type='date'
141
  ),
142
  height=800
@@ -161,14 +161,12 @@ def create_size_for_performance_plot(category_to_display: str,
161
 
162
  with gr.Blocks() as demo:
163
  with gr.Tab("Finance"):
164
- with gr.Tab("Big Five Capex"):
165
  big_five_capex_plot: gr.Plot = gr.Plot()
166
- big_five_capex_button: gr.Button = gr.Button("Show")
167
- with gr.Tab("Chip Designers Data Center Revenue"):
168
  chip_designers_data_center_revenue_plot: gr.Plot = gr.Plot()
169
- chip_designers_data_center_revenue_button: gr.Button = gr.Button("Show")
170
- with gr.Tab("Model Efficiency"):
171
- with gr.Tab("Parameters Necessary for Specific Performance Level"):
172
  with gr.Row():
173
  size_for_performance_category_dropdown: gr.Dropdown = gr.Dropdown(label="Category",
174
  value="full",
@@ -190,24 +188,39 @@ with gr.Blocks() as demo:
190
  )
191
  with gr.Tab("API Cost for Specific Performance Level", interactive=False):
192
  api_cost_for_performance_plot: gr.Plot = gr.Plot()
193
- api_cost_for_performance_button: gr.Button = gr.Button("Show")
194
- with gr.Tab("AI System Performance", interactive=False):
195
- with gr.Tab("SWE-bench"):
196
- swe_bench_plot: gr.Plot = gr.Plot()
197
- swe_bench_button: gr.Button = gr.Button("Show")
 
 
198
  with gr.Tab("GAIA"):
199
  gaia_plot: gr.Plot = gr.Plot()
200
- gaia_button: gr.Button = gr.Button("Show")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  with gr.Tab("Frontier Language Model Training Runs", interactive=False):
202
  with gr.Tab("Street Price of GPUs Used"):
203
  gpu_street_price_plot: gr.Plot = gr.Plot()
204
- gpu_street_price_button: gr.Button = gr.Button("Show")
205
  with gr.Tab("TDP of GPUs Used"):
206
  tdp_gpus_plot: gr.Plot = gr.Plot()
207
- tdp_gpus_button: gr.Button = gr.Button("Show")
208
- big_five_capex_button.click(fn=create_big_five_capex_plot, outputs=big_five_capex_plot)
209
- chip_designers_data_center_revenue_button.click(fn=create_chip_designers_data_center_revenue_plot,
210
- outputs=chip_designers_data_center_revenue_plot)
211
  size_for_performance_button.click(fn=create_size_for_performance_plot,
212
  inputs=[size_for_performance_category_dropdown,
213
  size_for_performance_parameter_number_dropdown,
 
78
  def create_size_for_performance_plot(category_to_display: str,
79
  parameter_type_to_display: str,
80
  model_to_compare: str) -> (go.Figure, gr.Dropdown, gr.Dropdown):
81
+ with open('elo_results_20240915.pkl', 'rb') as file:
82
  elo_results = pickle.load(file)
83
  categories: list[str] = list(elo_results["text"].keys())
84
  if category_to_display not in categories:
 
136
  yaxis_type='log',
137
  hovermode='x unified',
138
  xaxis=dict(
139
+ range=[date(2023, 2, 27), date(2024, 9, 15)],
140
  type='date'
141
  ),
142
  height=800
 
161
 
162
  with gr.Blocks() as demo:
163
  with gr.Tab("Finance"):
164
+ with gr.Tab("Big Five Capex") as big_five_capex_tab:
165
  big_five_capex_plot: gr.Plot = gr.Plot()
166
+ with gr.Tab("Chip Designers Data Center Revenue") as chip_designers_data_center_revenue_tab:
 
167
  chip_designers_data_center_revenue_plot: gr.Plot = gr.Plot()
168
+ with gr.Tab("Model Efficiency Over Time"):
169
+ with gr.Tab("Parameters Necessary for Specific Performance Level") as size_for_performance_tab:
 
170
  with gr.Row():
171
  size_for_performance_category_dropdown: gr.Dropdown = gr.Dropdown(label="Category",
172
  value="full",
 
188
  )
189
  with gr.Tab("API Cost for Specific Performance Level", interactive=False):
190
  api_cost_for_performance_plot: gr.Plot = gr.Plot()
191
+ with gr.Tab("System Performance Over Time", interactive=False):
192
+ with gr.Tab("ARC-AGI"):
193
+ arc_agi_plot: gr.Plot = gr.Plot()
194
+ with gr.Tab("BigCodeBench"):
195
+ bigcodebench_plot: gr.Plot = gr.Plot()
196
+ with gr.Tab("Codeforces"):
197
+ codeforces_plot: gr.Plot = gr.Plot()
198
  with gr.Tab("GAIA"):
199
  gaia_plot: gr.Plot = gr.Plot()
200
+ with gr.Tab("GPQA"):
201
+ gpqa_plot: gr.Plot = gr.Plot()
202
+ with gr.Tab("HumanEval"):
203
+ humaneval_plot: gr.Plot = gr.Plot()
204
+ with gr.Tab("LMSYS"):
205
+ lmsys_plot: gr.Plot = gr.Plot()
206
+ with gr.Tab("OpenCompass"):
207
+ opencompass_plot: gr.Plot = gr.Plot()
208
+ with gr.Tab("SWE-bench"):
209
+ swe_bench_plot: gr.Plot = gr.Plot()
210
+ with gr.Tab("Simple Bench"):
211
+ simple_bench_plot: gr.Plot = gr.Plot()
212
+ with gr.Tab("WebArena"):
213
+ webarena_plot: gr.Plot = gr.Plot()
214
+ with gr.Tab("ZeroEval"):
215
+ zeroeval_plot: gr.Plot = gr.Plot()
216
  with gr.Tab("Frontier Language Model Training Runs", interactive=False):
217
  with gr.Tab("Street Price of GPUs Used"):
218
  gpu_street_price_plot: gr.Plot = gr.Plot()
 
219
  with gr.Tab("TDP of GPUs Used"):
220
  tdp_gpus_plot: gr.Plot = gr.Plot()
221
+ big_five_capex_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot)
222
+ chip_designers_data_center_revenue_tab.select(fn=create_chip_designers_data_center_revenue_plot,
223
+ outputs=chip_designers_data_center_revenue_plot)
 
224
  size_for_performance_button.click(fn=create_size_for_performance_plot,
225
  inputs=[size_for_performance_category_dropdown,
226
  size_for_performance_parameter_number_dropdown,
elo_results_20240823.pkl → elo_results_20240915.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec103aa6bf5d0f02f8bd2c69c8ccfc8f1be1b44c7dc004d967c8d5ce470975b5
3
- size 3039588
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdce5fbf7a50d53ce549fd2c6c230627397856c4b62807b0e97a7f2c8554045e
3
+ size 3707205
models.jsonl CHANGED
@@ -1,12 +1,22 @@
 
 
 
 
 
 
1
  {"Name": "chatgpt-4o-latest", "Release Date": "2024-08-25", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
 
 
2
  {"Name": "gemini-1.5-pro-exp-0801", "Release Date": "2024-08-01", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
3
  {"Name": "grok-2-2024-08-13", "Release Date": "2024-08-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
4
  {"Name": "gpt-4o-2024-05-13", "Release Date": "2024-05-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
5
  {"Name": "gpt-4o-mini-2024-07-18", "Release Date": "2024-07-18", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
 
6
  {"Name": "claude-3-5-sonnet-20240620", "Release Date": "2024-06-20", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
7
  {"Name": "grok-2-mini-2024-08-13", "Release Date": "2024-08-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
8
  {"Name": "gemini-advanced-0514", "Release Date": "2024-05-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
9
- {"Name": "llama-3.1-405b-instruct", "Release Date": "2024-07-23", "Total Parameters": 405, "Active Parameters": 405, "API Cost": 0}
 
10
  {"Name": "gpt-4o-2024-08-06", "Release Date": "2024-08-06", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
11
  {"Name": "gemini-1.5-pro-api-0514", "Release Date": "2024-05-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
12
  {"Name": "gemini-1.5-pro-api-0409-preview", "Release Date": "2024-04-09", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
@@ -47,7 +57,7 @@
47
  {"Name": "qwen1.5-110b-chat", "Release Date": "2024-02-04", "Total Parameters": 110, "Active Parameters": 110, "API Cost": 0}
48
  {"Name": "mistral-large-2402", "Release Date": "2024-02-24", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
49
  {"Name": "yi-1.5-34b-chat", "Release Date": "2024-05-13", "Total Parameters": 34, "Active Parameters": 34, "API Cost": 0}
50
- {"Name": "reka-flash-21b-20240226-online", "Release Date": "2024-02-26", "Total Parameters": 21, "Active Parameters": 21, "API Cost": 0}
51
  {"Name": "llama-3-8b-instruct", "Release Date": "2024-04-18", "Total Parameters": 8, "Active Parameters": 8, "API Cost": 0}
52
  {"Name": "claude-1", "Release Date": "2023-03-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
53
  {"Name": "command-r", "Release Date": "2024-03-21", "Total Parameters": 35, "Active Parameters": 35, "API Cost": 0}
@@ -84,7 +94,7 @@
84
  {"Name": "nous-hermes-2-mixtral-8x7b-dpo", "Release Date": "2024-01-11", "Total Parameters": 47, "Active Parameters": 13, "API Cost": 0}
85
  {"Name": "gemma-1.1-7b-it", "Release Date": "2024-03-26", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
86
  {"Name": "llama2-70b-steerlm-chat", "Release Date": "2023-11-24", "Total Parameters": 70, "Active Parameters": 70, "API Cost": 0}
87
- {"Name": "pplx-70b-online", "Release Date": "2023-11-29", "Total Parameters": 70, "Active Parameters": 70, "API Cost": 0}
88
  {"Name": "deepseek-llm-67b-chat", "Release Date": "2023-11-29", "Total Parameters": 67, "Active Parameters": 67, "API Cost": 0}
89
  {"Name": "openchat-3.5", "Release Date": "2023-11-01", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
90
  {"Name": "openhermes-2.5-mistral-7b", "Release Date": "2023-10-29", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
@@ -99,7 +109,7 @@
99
  {"Name": "wizardlm-13b", "Release Date": "2023-05-26", "Total Parameters": 13, "Active Parameters": 13, "API Cost": 0}
100
  {"Name": "zephyr-7b-beta", "Release Date": "2023-10-16", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
101
  {"Name": "mpt-30b-chat", "Release Date": "2023-04-22", "Total Parameters": 30, "Active Parameters": 30, "API Cost": 0}
102
- {"Name": "pplx-7b-online", "Release Date": "2023-11-29", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
103
  {"Name": "codellama-70b-instruct", "Release Date": "2024-01-29", "Total Parameters": 70, "Active Parameters": 70, "API Cost": 0}
104
  {"Name": "codellama-34b-instruct", "Release Date": "2023-08-24", "Total Parameters": 34, "Active Parameters": 34, "API Cost": 0}
105
  {"Name": "vicuna-13b", "Release Date": "2023-06-22", "Total Parameters": 13, "Active Parameters": 13, "API Cost": 0}
 
1
+ {"Name": "deepseek-v2.5", "Release Date": "2024-09-05", "Total Parameters": 236, "Active Parameters": 236, "API Cost": 0}
2
+ {"Name": "qwen-plus-0828", "Release Date": "2024-08-28", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
3
+ {"Name": "qwen-plus-0828", "Release Date": "2024-08-28", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
4
+ {"Name": "gemini-1.5-pro-exp-0827", "Release Date": "2024-08-27", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
5
+ {"Name": "gemini-1.5-flash-exp-0827", "Release Date": "2024-08-27", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
6
+ {"Name": "gemini-1.5-flash-8b-exp-0827", "Release Date": "2024-08-27", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
7
  {"Name": "chatgpt-4o-latest", "Release Date": "2024-08-25", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
8
+ {"Name": "command-r-plus-08-2024", "Release Date": "2024-08-21", "Total Parameters": 104, "Active Parameters": 104, "API Cost": 0}
9
+ {"Name": "command-r-08-2024", "Release Date": "2024-08-19", "Total Parameters": 32, "Active Parameters": 32, "API Cost": 0}
10
  {"Name": "gemini-1.5-pro-exp-0801", "Release Date": "2024-08-01", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
11
  {"Name": "grok-2-2024-08-13", "Release Date": "2024-08-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
12
  {"Name": "gpt-4o-2024-05-13", "Release Date": "2024-05-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
13
  {"Name": "gpt-4o-mini-2024-07-18", "Release Date": "2024-07-18", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
14
+ {"Name": "gemma-2-9b-it-simpo", "Release Date": "2024-07-17", "Total Parameters": 9, "Active Parameters": 9, "API Cost": 0}
15
  {"Name": "claude-3-5-sonnet-20240620", "Release Date": "2024-06-20", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
16
  {"Name": "grok-2-mini-2024-08-13", "Release Date": "2024-08-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
17
  {"Name": "gemini-advanced-0514", "Release Date": "2024-05-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
18
+ {"Name": "llama-3.1-405b-instruct-bf16", "Release Date": "2024-07-23", "Total Parameters": 405, "Active Parameters": 405, "API Cost": 0}
19
+ {"Name": "llama-3.1-405b-instruct-fp8", "Release Date": "2024-07-23", "Total Parameters": 405, "Active Parameters": 405, "API Cost": 0}
20
  {"Name": "gpt-4o-2024-08-06", "Release Date": "2024-08-06", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
21
  {"Name": "gemini-1.5-pro-api-0514", "Release Date": "2024-05-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
22
  {"Name": "gemini-1.5-pro-api-0409-preview", "Release Date": "2024-04-09", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
 
57
  {"Name": "qwen1.5-110b-chat", "Release Date": "2024-02-04", "Total Parameters": 110, "Active Parameters": 110, "API Cost": 0}
58
  {"Name": "mistral-large-2402", "Release Date": "2024-02-24", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
59
  {"Name": "yi-1.5-34b-chat", "Release Date": "2024-05-13", "Total Parameters": 34, "Active Parameters": 34, "API Cost": 0}
60
+ {"Name": "reka-flash-21b-20240226-online", "Release Date": "2024-02-26", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
61
  {"Name": "llama-3-8b-instruct", "Release Date": "2024-04-18", "Total Parameters": 8, "Active Parameters": 8, "API Cost": 0}
62
  {"Name": "claude-1", "Release Date": "2023-03-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
63
  {"Name": "command-r", "Release Date": "2024-03-21", "Total Parameters": 35, "Active Parameters": 35, "API Cost": 0}
 
94
  {"Name": "nous-hermes-2-mixtral-8x7b-dpo", "Release Date": "2024-01-11", "Total Parameters": 47, "Active Parameters": 13, "API Cost": 0}
95
  {"Name": "gemma-1.1-7b-it", "Release Date": "2024-03-26", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
96
  {"Name": "llama2-70b-steerlm-chat", "Release Date": "2023-11-24", "Total Parameters": 70, "Active Parameters": 70, "API Cost": 0}
97
+ {"Name": "pplx-70b-online", "Release Date": "2023-11-29", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
98
  {"Name": "deepseek-llm-67b-chat", "Release Date": "2023-11-29", "Total Parameters": 67, "Active Parameters": 67, "API Cost": 0}
99
  {"Name": "openchat-3.5", "Release Date": "2023-11-01", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
100
  {"Name": "openhermes-2.5-mistral-7b", "Release Date": "2023-10-29", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
 
109
  {"Name": "wizardlm-13b", "Release Date": "2023-05-26", "Total Parameters": 13, "Active Parameters": 13, "API Cost": 0}
110
  {"Name": "zephyr-7b-beta", "Release Date": "2023-10-16", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
111
  {"Name": "mpt-30b-chat", "Release Date": "2023-04-22", "Total Parameters": 30, "Active Parameters": 30, "API Cost": 0}
112
+ {"Name": "pplx-7b-online", "Release Date": "2023-11-29", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
113
  {"Name": "codellama-70b-instruct", "Release Date": "2024-01-29", "Total Parameters": 70, "Active Parameters": 70, "API Cost": 0}
114
  {"Name": "codellama-34b-instruct", "Release Date": "2023-08-24", "Total Parameters": 34, "Active Parameters": 34, "API Cost": 0}
115
  {"Name": "vicuna-13b", "Release Date": "2023-06-22", "Total Parameters": 13, "Active Parameters": 13, "API Cost": 0}