Spaces:
Sleeping
Sleeping
kaizuberbuehler
commited on
Commit
•
5467082
1
Parent(s):
8e52bb6
Update data; Other changes
Browse files- app.py +32 -19
- elo_results_20240823.pkl → elo_results_20240915.pkl +2 -2
- models.jsonl +14 -4
app.py
CHANGED
@@ -78,7 +78,7 @@ def create_chip_designers_data_center_revenue_plot():
|
|
78 |
def create_size_for_performance_plot(category_to_display: str,
|
79 |
parameter_type_to_display: str,
|
80 |
model_to_compare: str) -> (go.Figure, gr.Dropdown, gr.Dropdown):
|
81 |
-
with open('
|
82 |
elo_results = pickle.load(file)
|
83 |
categories: list[str] = list(elo_results["text"].keys())
|
84 |
if category_to_display not in categories:
|
@@ -136,7 +136,7 @@ def create_size_for_performance_plot(category_to_display: str,
|
|
136 |
yaxis_type='log',
|
137 |
hovermode='x unified',
|
138 |
xaxis=dict(
|
139 |
-
range=[date(2023, 2, 27), date(2024,
|
140 |
type='date'
|
141 |
),
|
142 |
height=800
|
@@ -161,14 +161,12 @@ def create_size_for_performance_plot(category_to_display: str,
|
|
161 |
|
162 |
with gr.Blocks() as demo:
|
163 |
with gr.Tab("Finance"):
|
164 |
-
with gr.Tab("Big Five Capex"):
|
165 |
big_five_capex_plot: gr.Plot = gr.Plot()
|
166 |
-
|
167 |
-
with gr.Tab("Chip Designers Data Center Revenue"):
|
168 |
chip_designers_data_center_revenue_plot: gr.Plot = gr.Plot()
|
169 |
-
|
170 |
-
|
171 |
-
with gr.Tab("Parameters Necessary for Specific Performance Level"):
|
172 |
with gr.Row():
|
173 |
size_for_performance_category_dropdown: gr.Dropdown = gr.Dropdown(label="Category",
|
174 |
value="full",
|
@@ -190,24 +188,39 @@ with gr.Blocks() as demo:
|
|
190 |
)
|
191 |
with gr.Tab("API Cost for Specific Performance Level", interactive=False):
|
192 |
api_cost_for_performance_plot: gr.Plot = gr.Plot()
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
|
|
|
|
198 |
with gr.Tab("GAIA"):
|
199 |
gaia_plot: gr.Plot = gr.Plot()
|
200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
with gr.Tab("Frontier Language Model Training Runs", interactive=False):
|
202 |
with gr.Tab("Street Price of GPUs Used"):
|
203 |
gpu_street_price_plot: gr.Plot = gr.Plot()
|
204 |
-
gpu_street_price_button: gr.Button = gr.Button("Show")
|
205 |
with gr.Tab("TDP of GPUs Used"):
|
206 |
tdp_gpus_plot: gr.Plot = gr.Plot()
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
outputs=chip_designers_data_center_revenue_plot)
|
211 |
size_for_performance_button.click(fn=create_size_for_performance_plot,
|
212 |
inputs=[size_for_performance_category_dropdown,
|
213 |
size_for_performance_parameter_number_dropdown,
|
|
|
78 |
def create_size_for_performance_plot(category_to_display: str,
|
79 |
parameter_type_to_display: str,
|
80 |
model_to_compare: str) -> (go.Figure, gr.Dropdown, gr.Dropdown):
|
81 |
+
with open('elo_results_20240915.pkl', 'rb') as file:
|
82 |
elo_results = pickle.load(file)
|
83 |
categories: list[str] = list(elo_results["text"].keys())
|
84 |
if category_to_display not in categories:
|
|
|
136 |
yaxis_type='log',
|
137 |
hovermode='x unified',
|
138 |
xaxis=dict(
|
139 |
+
range=[date(2023, 2, 27), date(2024, 9, 15)],
|
140 |
type='date'
|
141 |
),
|
142 |
height=800
|
|
|
161 |
|
162 |
with gr.Blocks() as demo:
|
163 |
with gr.Tab("Finance"):
|
164 |
+
with gr.Tab("Big Five Capex") as big_five_capex_tab:
|
165 |
big_five_capex_plot: gr.Plot = gr.Plot()
|
166 |
+
with gr.Tab("Chip Designers Data Center Revenue") as chip_designers_data_center_revenue_tab:
|
|
|
167 |
chip_designers_data_center_revenue_plot: gr.Plot = gr.Plot()
|
168 |
+
with gr.Tab("Model Efficiency Over Time"):
|
169 |
+
with gr.Tab("Parameters Necessary for Specific Performance Level") as size_for_performance_tab:
|
|
|
170 |
with gr.Row():
|
171 |
size_for_performance_category_dropdown: gr.Dropdown = gr.Dropdown(label="Category",
|
172 |
value="full",
|
|
|
188 |
)
|
189 |
with gr.Tab("API Cost for Specific Performance Level", interactive=False):
|
190 |
api_cost_for_performance_plot: gr.Plot = gr.Plot()
|
191 |
+
with gr.Tab("System Performance Over Time", interactive=False):
|
192 |
+
with gr.Tab("ARC-AGI"):
|
193 |
+
arc_agi_plot: gr.Plot = gr.Plot()
|
194 |
+
with gr.Tab("BigCodeBench"):
|
195 |
+
bigcodebench_plot: gr.Plot = gr.Plot()
|
196 |
+
with gr.Tab("Codeforces"):
|
197 |
+
codeforces_plot: gr.Plot = gr.Plot()
|
198 |
with gr.Tab("GAIA"):
|
199 |
gaia_plot: gr.Plot = gr.Plot()
|
200 |
+
with gr.Tab("GPQA"):
|
201 |
+
gpqa_plot: gr.Plot = gr.Plot()
|
202 |
+
with gr.Tab("HumanEval"):
|
203 |
+
humaneval_plot: gr.Plot = gr.Plot()
|
204 |
+
with gr.Tab("LMSYS"):
|
205 |
+
lmsys_plot: gr.Plot = gr.Plot()
|
206 |
+
with gr.Tab("OpenCompass"):
|
207 |
+
opencompass_plot: gr.Plot = gr.Plot()
|
208 |
+
with gr.Tab("SWE-bench"):
|
209 |
+
swe_bench_plot: gr.Plot = gr.Plot()
|
210 |
+
with gr.Tab("Simple Bench"):
|
211 |
+
simple_bench_plot: gr.Plot = gr.Plot()
|
212 |
+
with gr.Tab("WebArena"):
|
213 |
+
webarena_plot: gr.Plot = gr.Plot()
|
214 |
+
with gr.Tab("ZeroEval"):
|
215 |
+
zeroeval_plot: gr.Plot = gr.Plot()
|
216 |
with gr.Tab("Frontier Language Model Training Runs", interactive=False):
|
217 |
with gr.Tab("Street Price of GPUs Used"):
|
218 |
gpu_street_price_plot: gr.Plot = gr.Plot()
|
|
|
219 |
with gr.Tab("TDP of GPUs Used"):
|
220 |
tdp_gpus_plot: gr.Plot = gr.Plot()
|
221 |
+
big_five_capex_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot)
|
222 |
+
chip_designers_data_center_revenue_tab.select(fn=create_chip_designers_data_center_revenue_plot,
|
223 |
+
outputs=chip_designers_data_center_revenue_plot)
|
|
|
224 |
size_for_performance_button.click(fn=create_size_for_performance_plot,
|
225 |
inputs=[size_for_performance_category_dropdown,
|
226 |
size_for_performance_parameter_number_dropdown,
|
elo_results_20240823.pkl → elo_results_20240915.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdce5fbf7a50d53ce549fd2c6c230627397856c4b62807b0e97a7f2c8554045e
|
3 |
+
size 3707205
|
models.jsonl
CHANGED
@@ -1,12 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
{"Name": "chatgpt-4o-latest", "Release Date": "2024-08-25", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
|
|
|
|
2 |
{"Name": "gemini-1.5-pro-exp-0801", "Release Date": "2024-08-01", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
3 |
{"Name": "grok-2-2024-08-13", "Release Date": "2024-08-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
4 |
{"Name": "gpt-4o-2024-05-13", "Release Date": "2024-05-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
5 |
{"Name": "gpt-4o-mini-2024-07-18", "Release Date": "2024-07-18", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
|
|
6 |
{"Name": "claude-3-5-sonnet-20240620", "Release Date": "2024-06-20", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
7 |
{"Name": "grok-2-mini-2024-08-13", "Release Date": "2024-08-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
8 |
{"Name": "gemini-advanced-0514", "Release Date": "2024-05-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
9 |
-
{"Name": "llama-3.1-405b-instruct", "Release Date": "2024-07-23", "Total Parameters": 405, "Active Parameters": 405, "API Cost": 0}
|
|
|
10 |
{"Name": "gpt-4o-2024-08-06", "Release Date": "2024-08-06", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
11 |
{"Name": "gemini-1.5-pro-api-0514", "Release Date": "2024-05-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
12 |
{"Name": "gemini-1.5-pro-api-0409-preview", "Release Date": "2024-04-09", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
@@ -47,7 +57,7 @@
|
|
47 |
{"Name": "qwen1.5-110b-chat", "Release Date": "2024-02-04", "Total Parameters": 110, "Active Parameters": 110, "API Cost": 0}
|
48 |
{"Name": "mistral-large-2402", "Release Date": "2024-02-24", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
49 |
{"Name": "yi-1.5-34b-chat", "Release Date": "2024-05-13", "Total Parameters": 34, "Active Parameters": 34, "API Cost": 0}
|
50 |
-
{"Name": "reka-flash-21b-20240226-online", "Release Date": "2024-02-26", "Total Parameters":
|
51 |
{"Name": "llama-3-8b-instruct", "Release Date": "2024-04-18", "Total Parameters": 8, "Active Parameters": 8, "API Cost": 0}
|
52 |
{"Name": "claude-1", "Release Date": "2023-03-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
53 |
{"Name": "command-r", "Release Date": "2024-03-21", "Total Parameters": 35, "Active Parameters": 35, "API Cost": 0}
|
@@ -84,7 +94,7 @@
|
|
84 |
{"Name": "nous-hermes-2-mixtral-8x7b-dpo", "Release Date": "2024-01-11", "Total Parameters": 47, "Active Parameters": 13, "API Cost": 0}
|
85 |
{"Name": "gemma-1.1-7b-it", "Release Date": "2024-03-26", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
|
86 |
{"Name": "llama2-70b-steerlm-chat", "Release Date": "2023-11-24", "Total Parameters": 70, "Active Parameters": 70, "API Cost": 0}
|
87 |
-
{"Name": "pplx-70b-online", "Release Date": "2023-11-29", "Total Parameters":
|
88 |
{"Name": "deepseek-llm-67b-chat", "Release Date": "2023-11-29", "Total Parameters": 67, "Active Parameters": 67, "API Cost": 0}
|
89 |
{"Name": "openchat-3.5", "Release Date": "2023-11-01", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
|
90 |
{"Name": "openhermes-2.5-mistral-7b", "Release Date": "2023-10-29", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
|
@@ -99,7 +109,7 @@
|
|
99 |
{"Name": "wizardlm-13b", "Release Date": "2023-05-26", "Total Parameters": 13, "Active Parameters": 13, "API Cost": 0}
|
100 |
{"Name": "zephyr-7b-beta", "Release Date": "2023-10-16", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
|
101 |
{"Name": "mpt-30b-chat", "Release Date": "2023-04-22", "Total Parameters": 30, "Active Parameters": 30, "API Cost": 0}
|
102 |
-
{"Name": "pplx-7b-online", "Release Date": "2023-11-29", "Total Parameters":
|
103 |
{"Name": "codellama-70b-instruct", "Release Date": "2024-01-29", "Total Parameters": 70, "Active Parameters": 70, "API Cost": 0}
|
104 |
{"Name": "codellama-34b-instruct", "Release Date": "2023-08-24", "Total Parameters": 34, "Active Parameters": 34, "API Cost": 0}
|
105 |
{"Name": "vicuna-13b", "Release Date": "2023-06-22", "Total Parameters": 13, "Active Parameters": 13, "API Cost": 0}
|
|
|
1 |
+
{"Name": "deepseek-v2.5", "Release Date": "2024-09-05", "Total Parameters": 236, "Active Parameters": 236, "API Cost": 0}
|
2 |
+
{"Name": "qwen-plus-0828", "Release Date": "2024-08-28", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
3 |
+
{"Name": "qwen-plus-0828", "Release Date": "2024-08-28", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
4 |
+
{"Name": "gemini-1.5-pro-exp-0827", "Release Date": "2024-08-27", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
5 |
+
{"Name": "gemini-1.5-flash-exp-0827", "Release Date": "2024-08-27", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
6 |
+
{"Name": "gemini-1.5-flash-8b-exp-0827", "Release Date": "2024-08-27", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
7 |
{"Name": "chatgpt-4o-latest", "Release Date": "2024-08-25", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
8 |
+
{"Name": "command-r-plus-08-2024", "Release Date": "2024-08-21", "Total Parameters": 104, "Active Parameters": 104, "API Cost": 0}
|
9 |
+
{"Name": "command-r-08-2024", "Release Date": "2024-08-19", "Total Parameters": 32, "Active Parameters": 32, "API Cost": 0}
|
10 |
{"Name": "gemini-1.5-pro-exp-0801", "Release Date": "2024-08-01", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
11 |
{"Name": "grok-2-2024-08-13", "Release Date": "2024-08-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
12 |
{"Name": "gpt-4o-2024-05-13", "Release Date": "2024-05-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
13 |
{"Name": "gpt-4o-mini-2024-07-18", "Release Date": "2024-07-18", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
14 |
+
{"Name": "gemma-2-9b-it-simpo", "Release Date": "2024-07-17", "Total Parameters": 9, "Active Parameters": 9, "API Cost": 0}
|
15 |
{"Name": "claude-3-5-sonnet-20240620", "Release Date": "2024-06-20", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
16 |
{"Name": "grok-2-mini-2024-08-13", "Release Date": "2024-08-13", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
17 |
{"Name": "gemini-advanced-0514", "Release Date": "2024-05-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
18 |
+
{"Name": "llama-3.1-405b-instruct-bf16", "Release Date": "2024-07-23", "Total Parameters": 405, "Active Parameters": 405, "API Cost": 0}
|
19 |
+
{"Name": "llama-3.1-405b-instruct-fp8", "Release Date": "2024-07-23", "Total Parameters": 405, "Active Parameters": 405, "API Cost": 0}
|
20 |
{"Name": "gpt-4o-2024-08-06", "Release Date": "2024-08-06", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
21 |
{"Name": "gemini-1.5-pro-api-0514", "Release Date": "2024-05-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
22 |
{"Name": "gemini-1.5-pro-api-0409-preview", "Release Date": "2024-04-09", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
|
|
57 |
{"Name": "qwen1.5-110b-chat", "Release Date": "2024-02-04", "Total Parameters": 110, "Active Parameters": 110, "API Cost": 0}
|
58 |
{"Name": "mistral-large-2402", "Release Date": "2024-02-24", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
59 |
{"Name": "yi-1.5-34b-chat", "Release Date": "2024-05-13", "Total Parameters": 34, "Active Parameters": 34, "API Cost": 0}
|
60 |
+
{"Name": "reka-flash-21b-20240226-online", "Release Date": "2024-02-26", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
61 |
{"Name": "llama-3-8b-instruct", "Release Date": "2024-04-18", "Total Parameters": 8, "Active Parameters": 8, "API Cost": 0}
|
62 |
{"Name": "claude-1", "Release Date": "2023-03-14", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
63 |
{"Name": "command-r", "Release Date": "2024-03-21", "Total Parameters": 35, "Active Parameters": 35, "API Cost": 0}
|
|
|
94 |
{"Name": "nous-hermes-2-mixtral-8x7b-dpo", "Release Date": "2024-01-11", "Total Parameters": 47, "Active Parameters": 13, "API Cost": 0}
|
95 |
{"Name": "gemma-1.1-7b-it", "Release Date": "2024-03-26", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
|
96 |
{"Name": "llama2-70b-steerlm-chat", "Release Date": "2023-11-24", "Total Parameters": 70, "Active Parameters": 70, "API Cost": 0}
|
97 |
+
{"Name": "pplx-70b-online", "Release Date": "2023-11-29", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
98 |
{"Name": "deepseek-llm-67b-chat", "Release Date": "2023-11-29", "Total Parameters": 67, "Active Parameters": 67, "API Cost": 0}
|
99 |
{"Name": "openchat-3.5", "Release Date": "2023-11-01", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
|
100 |
{"Name": "openhermes-2.5-mistral-7b", "Release Date": "2023-10-29", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
|
|
|
109 |
{"Name": "wizardlm-13b", "Release Date": "2023-05-26", "Total Parameters": 13, "Active Parameters": 13, "API Cost": 0}
|
110 |
{"Name": "zephyr-7b-beta", "Release Date": "2023-10-16", "Total Parameters": 7, "Active Parameters": 7, "API Cost": 0}
|
111 |
{"Name": "mpt-30b-chat", "Release Date": "2023-04-22", "Total Parameters": 30, "Active Parameters": 30, "API Cost": 0}
|
112 |
+
{"Name": "pplx-7b-online", "Release Date": "2023-11-29", "Total Parameters": 0, "Active Parameters": 0, "API Cost": 0}
|
113 |
{"Name": "codellama-70b-instruct", "Release Date": "2024-01-29", "Total Parameters": 70, "Active Parameters": 70, "API Cost": 0}
|
114 |
{"Name": "codellama-34b-instruct", "Release Date": "2023-08-24", "Total Parameters": 34, "Active Parameters": 34, "API Cost": 0}
|
115 |
{"Name": "vicuna-13b", "Release Date": "2023-06-22", "Total Parameters": 13, "Active Parameters": 13, "API Cost": 0}
|