Spaces:
Sleeping
Sleeping
import json | |
import pickle | |
from datetime import datetime, date | |
import gradio as gr | |
import pandas as pd | |
import plotly.graph_objects as go | |
def create_big_five_capex_plot(): | |
# Capex in Millions of USD per Quarter of Microsoft, Google, Meta, Apple, Amazon | |
big_five_capex = [] | |
with open("big_five_capex.jsonl", 'r') as file: | |
for line in file: | |
big_five_capex.append(json.loads(line)) | |
df = pd.DataFrame(big_five_capex) | |
fig = go.Figure() | |
companies = ['Microsoft', 'Google', 'Meta', 'Apple', 'Amazon'] | |
colors = ['#80bb00', '#ee161f', '#0065e3', '#000000', '#ff6200'] | |
for company, color in zip(companies, colors): | |
fig.add_trace(go.Bar( | |
x=df['Quarter'], | |
y=df[company], | |
name=company, | |
marker_color=color | |
)) | |
fig.add_vline( | |
x=df.index[df['Quarter'] == "2023 Q1"].tolist()[0] + 0.5, | |
line_width=1, | |
line_dash="dash", | |
line_color="black", | |
annotation_text="AI arms race begins", | |
annotation_position="top right", | |
annotation_font_size=12, | |
annotation_font_color="black" | |
) | |
fig.update_layout( | |
title='Capital Expenditure of the Big Five Tech Companies in Millions of U.S. Dollars per Quarter', | |
xaxis_title='Quarter', | |
yaxis_title='Capex (Millions of U.S. Dollars)', | |
barmode='stack', | |
legend_title='Companies', | |
height=800 | |
) | |
return fig | |
def create_chip_designers_data_center_revenue_plot(): | |
# Data Center Revenue in Millions of USD per Quarter of NVIDIA, AMD and Intel | |
data_center_revenue_by_company = [] | |
with open("chip_designers_data_center_revenue.jsonl", 'r') as file: | |
for line in file: | |
data_center_revenue_by_company.append(json.loads(line)) | |
df = pd.DataFrame(data_center_revenue_by_company) | |
fig = go.Figure() | |
companies = ['NVIDIA', 'AMD', 'Intel'] | |
colors = ['#80bb00', '#ee161f', '#0065e3'] # TODO | |
for company, color in zip(companies, colors): | |
fig.add_trace(go.Bar( | |
x=df['Quarter'], | |
y=df[company], | |
name=company, | |
marker_color=color | |
)) | |
fig.update_layout( | |
title='Data Center Revenue of NVIDIA, AMD and Intel in Millions of U.S. Dollars per Quarter', | |
xaxis_title='Quarter', | |
yaxis_title='Data Center Revenue (Millions of U.S. Dollars)', | |
barmode='stack', | |
legend_title='Companies', | |
height=800 | |
) | |
return fig | |
def create_size_for_performance_plot(category_to_display: str, | |
parameter_type_to_display: str, | |
model_to_compare: str) -> (go.Figure, gr.Dropdown, gr.Dropdown): | |
with open('elo_results_20240915.pkl', 'rb') as file: | |
elo_results = pickle.load(file) | |
categories: list[str] = list(elo_results["text"].keys()) | |
if category_to_display not in categories: | |
raise gr.Error(message=f"Category '{category_to_display}' not found.") | |
elo_ratings_for_category: dict = dict(elo_results["text"][category_to_display]["elo_rating_final"]) | |
models: list[dict] = [] | |
with open("models.jsonl", 'r') as file: | |
for line in file: | |
models.append(json.loads(line)) | |
size_for_performance_data: list[dict] = [] | |
for model_name, model_elo_rating in elo_ratings_for_category.items(): | |
model_entries_found = [model for model in models if model["Name"] == model_name] | |
if model_entries_found: | |
size_for_performance_data.append({ | |
"Name": model_name, | |
"Release Date": model_entries_found[0]["Release Date"], | |
"ELO Rating": model_elo_rating, | |
parameter_type_to_display: model_entries_found[0][parameter_type_to_display] | |
}) | |
else: | |
print(f"[WARNING] Model '{model_name}' not found in models.jsonl") | |
comparison_model_elo_score = elo_ratings_for_category[model_to_compare] | |
filtered_models = [model for model in size_for_performance_data | |
if model[parameter_type_to_display] > 0 and | |
model['ELO Rating'] >= comparison_model_elo_score] | |
filtered_models.sort(key=lambda x: datetime.strptime(x['Release Date'], "%Y-%m-%d")) | |
x_dates = [datetime.strptime(model['Release Date'], "%Y-%m-%d") for model in filtered_models] | |
y_params = [] | |
min_param = float('inf') | |
for model in filtered_models: | |
param = model[parameter_type_to_display] | |
if param <= min_param: | |
min_param = param | |
y_params.append(min_param) | |
fig = go.Figure() | |
fig.add_trace(go.Scatter( | |
x=x_dates, | |
y=y_params, | |
mode='lines', | |
line=dict(shape='hv', width=2), | |
name='Model Parameters' | |
)) | |
fig.update_layout( | |
title=f'Model Size Progression for Open-Weights Models Reaching Performance of "{model_to_compare}" in "{category_to_display}" Category', | |
xaxis_title='Release Date', | |
yaxis_title=parameter_type_to_display, | |
yaxis_type='log', | |
hovermode='x unified', | |
xaxis=dict( | |
range=[date(2023, 2, 27), date(2024, 9, 15)], | |
type='date' | |
), | |
height=800 | |
) | |
for i, model in enumerate(filtered_models): | |
if i == 0 or y_params[i] < y_params[i - 1]: | |
fig.add_trace(go.Scatter( | |
x=[x_dates[i]], | |
y=[y_params[i]], | |
mode='markers+text', | |
marker=dict(size=10), | |
text=[model['Name']], | |
textposition="top center", | |
name=model['Name'] | |
)) | |
return (fig, | |
gr.Dropdown(choices=categories, value=category_to_display, interactive=True), | |
gr.Dropdown(choices=list(elo_ratings_for_category.keys()), value=model_to_compare, interactive=True)) | |
def create_simple_plot(data_path: str, | |
name: str, | |
start_date: datetime, end_date: datetime, | |
min_value: int = 0, max_value: int = 100) -> go.Figure: | |
simple_bench_leaderboard = [] | |
with open(data_path, 'r') as file: | |
for line in file: | |
simple_bench_leaderboard.append(json.loads(line)) | |
models = [] | |
with open("models.jsonl", 'r') as file: | |
for line in file: | |
models.append(json.loads(line)) | |
data = [] | |
for entry in simple_bench_leaderboard: | |
model_name = entry['model'] | |
score = entry['score'] | |
model_info = next((m for m in models if m['Name'] == model_name), None) | |
if model_info: | |
release_date = datetime.strptime(model_info['Release Date'], "%Y-%m-%d") | |
data.append({'model': model_name, 'score': score, 'release_date': release_date}) | |
else: | |
print(f"[WARNING] Model '{model_name}' not found in models.jsonl") | |
data.sort(key=lambda x: x['release_date']) | |
x_dates = [d['release_date'] for d in data] | |
y_scores = [] | |
max_score = 0 | |
for entry in data: | |
if entry['score'] > max_score: | |
max_score = entry['score'] | |
y_scores.append(max_score) | |
fig = go.Figure() | |
fig.add_trace(go.Scatter( | |
x=x_dates, | |
y=y_scores, | |
mode='lines', | |
line=dict(shape='hv', width=2), | |
name='Best Score to Date' | |
)) | |
for i, entry in enumerate(data): | |
if i == 0 or y_scores[i] > y_scores[i - 1]: | |
fig.add_trace(go.Scatter( | |
x=[entry['release_date']], | |
y=[entry['score']], | |
mode='markers+text', | |
marker=dict(size=10), | |
text=[entry['model']], | |
textposition="top center", | |
name=entry['model'] | |
)) | |
fig.update_layout( | |
title=f'{name} Over Time', | |
xaxis_title='Release Date', | |
yaxis_title=name, | |
hovermode='x unified', | |
xaxis=dict( | |
range=[start_date, end_date], | |
type='date' | |
), | |
yaxis=dict( | |
range=[min_value, max_value] | |
), | |
height=800 | |
) | |
return fig | |
with gr.Blocks() as demo: | |
with gr.Tab("Finance"): | |
with gr.Tab("Big Five Capex") as big_five_capex_tab: | |
big_five_capex_plot: gr.Plot = gr.Plot() | |
with gr.Tab("Chip Designers Data Center Revenue") as chip_designers_data_center_revenue_tab: | |
chip_designers_data_center_revenue_plot: gr.Plot = gr.Plot() | |
with gr.Tab("Model Efficiency Over Time"): | |
with gr.Tab("Parameters Necessary for Specific Performance Level") as size_for_performance_tab: | |
with gr.Row(): | |
size_for_performance_category_dropdown: gr.Dropdown = gr.Dropdown(label="Category", | |
value="full", | |
choices=["full"], | |
interactive=False) | |
size_for_performance_parameter_number_dropdown: gr.Dropdown = gr.Dropdown(label="Parameter Number", | |
choices=["Total Parameters", | |
"Active Parameters"], | |
value="Total Parameters", | |
interactive=True) | |
size_for_performance_comparison_model_dropdown: gr.Dropdown = gr.Dropdown(label="Model for Comparison", | |
value="gpt-4-0314", | |
choices=["gpt-4-0314"], | |
interactive=False) | |
size_for_performance_plot: gr.Plot = gr.Plot() | |
size_for_performance_button: gr.Button = gr.Button("Show") | |
size_for_performance_markdown: gr.Markdown = gr.Markdown( | |
value="""Model performance as reported on [LMSYS Chatbot Arena Leaderboard](https://lmarena.ai/?leaderboard).""" | |
) | |
with gr.Tab("API Cost for Specific Performance Level", interactive=False): | |
api_cost_for_performance_plot: gr.Plot = gr.Plot() | |
with gr.Tab("System Performance Over Time"): | |
with gr.Tab("ARC-AGI-Pub") as arc_agi_tab: | |
arc_agi_plot: gr.Plot = gr.Plot() | |
with gr.Tab("Simple Bench") as simple_bench_tab: | |
simple_bench_plot: gr.Plot = gr.Plot() | |
with gr.Tab("PlanBench") as planbench_tab: | |
planbench_plot: gr.Plot = gr.Plot() | |
planbench_markdown: gr.Markdown = gr.Markdown( | |
value="""Source: [Valmeekam et al. 2024](https://arxiv.org/abs/2409.13373)""" | |
) | |
with gr.Tab("Codeforces") as codeforces_tab: | |
with gr.Tab("General-Purpose Systems"): | |
codeforces_plot: gr.Plot = gr.Plot() | |
with gr.Tab("BigCodeBench", interactive=False): | |
bigcodebench_plot: gr.Plot = gr.Plot() | |
with gr.Tab("GAIA", interactive=False): | |
gaia_plot: gr.Plot = gr.Plot() | |
with gr.Tab("GPQA", interactive=False): | |
gpqa_plot: gr.Plot = gr.Plot() | |
with gr.Tab("HumanEval", interactive=False): | |
humaneval_plot: gr.Plot = gr.Plot() | |
with gr.Tab("Chatbot Arena", interactive=False): | |
chatbot_arena_plot: gr.Plot = gr.Plot() | |
with gr.Tab("MATH", interactive=False): | |
math_plot: gr.Plot = gr.Plot() | |
with gr.Tab("OpenCompass", interactive=False): | |
opencompass_plot: gr.Plot = gr.Plot() | |
with gr.Tab("SWE-bench", interactive=False): | |
swe_bench_plot: gr.Plot = gr.Plot() | |
with gr.Tab("WebArena", interactive=False): | |
webarena_plot: gr.Plot = gr.Plot() | |
with gr.Tab("ZeroEval", interactive=False): | |
zeroeval_plot: gr.Plot = gr.Plot() | |
with gr.Tab("Frontier Language Model Training Runs", interactive=False): | |
with gr.Tab("Street Price of GPUs Used"): | |
gpu_street_price_plot: gr.Plot = gr.Plot() | |
with gr.Tab("TDP of GPUs Used"): | |
tdp_gpus_plot: gr.Plot = gr.Plot() | |
big_five_capex_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot) | |
chip_designers_data_center_revenue_tab.select(fn=create_chip_designers_data_center_revenue_plot, | |
outputs=chip_designers_data_center_revenue_plot) | |
size_for_performance_button.click(fn=create_size_for_performance_plot, | |
inputs=[size_for_performance_category_dropdown, | |
size_for_performance_parameter_number_dropdown, | |
size_for_performance_comparison_model_dropdown], | |
outputs=[size_for_performance_plot, | |
size_for_performance_category_dropdown, | |
size_for_performance_comparison_model_dropdown]) | |
arc_agi_tab.select(fn=create_simple_plot, | |
inputs=[gr.State("arc_agi_leaderboard.jsonl"), gr.State("ARC-AGI-Pub (Public Eval) Score"), | |
gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20))], | |
outputs=arc_agi_plot) | |
simple_bench_tab.select(fn=create_simple_plot, | |
inputs=[gr.State("simple_bench_leaderboard.jsonl"), gr.State("Simple Bench Score"), | |
gr.State(date(2023, 6, 13)), gr.State(date(2024, 8, 14))], | |
outputs=simple_bench_plot) | |
codeforces_tab.select(fn=create_simple_plot, | |
inputs=[gr.State("codeforces_leaderboard.jsonl"), gr.State("Codeforces (Elo Rating)"), | |
gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20)), | |
gr.State(800), gr.State(3000)], | |
outputs=codeforces_plot) | |
planbench_tab.select(fn=create_simple_plot, | |
inputs=[gr.State("planbench_leaderboard.jsonl"), gr.State("PlanBench (Mystery Blocksworld, 0-shot) Score"), | |
gr.State(date(2023, 3, 14)), gr.State(date(2024, 9, 23))], | |
outputs=planbench_plot) | |
if __name__ == "__main__": | |
demo.launch() | |