import json import pickle from datetime import datetime, date import gradio as gr import pandas as pd import plotly.graph_objects as go def create_big_five_capex_plot(): # Capex in Millions of USD per Quarter of Microsoft, Google, Meta, Apple, Amazon big_five_capex = [] with open("big_five_capex.jsonl", 'r') as file: for line in file: big_five_capex.append(json.loads(line)) df = pd.DataFrame(big_five_capex) fig = go.Figure() companies = ['Microsoft', 'Google', 'Meta', 'Apple', 'Amazon'] colors = ['#80bb00', '#ee161f', '#0065e3', '#000000', '#ff6200'] for company, color in zip(companies, colors): fig.add_trace(go.Bar( x=df['Quarter'], y=df[company], name=company, marker_color=color )) fig.add_vline( x=df.index[df['Quarter'] == "2023 Q1"].tolist()[0] + 0.5, line_width=1, line_dash="dash", line_color="black", annotation_text="AI arms race begins", annotation_position="top right", annotation_font_size=12, annotation_font_color="black" ) fig.update_layout( title='Capital Expenditure of the Big Five Tech Companies in Millions of U.S. Dollars per Quarter', xaxis_title='Quarter', yaxis_title='Capex (Millions of U.S. Dollars)', barmode='stack', legend_title='Companies', height=800 ) return fig def create_chip_designers_data_center_revenue_plot(): # Data Center Revenue in Millions of USD per Quarter of NVIDIA, AMD and Intel data_center_revenue_by_company = [] with open("chip_designers_data_center_revenue.jsonl", 'r') as file: for line in file: data_center_revenue_by_company.append(json.loads(line)) df = pd.DataFrame(data_center_revenue_by_company) fig = go.Figure() companies = ['NVIDIA', 'AMD', 'Intel'] colors = ['#80bb00', '#ee161f', '#0065e3'] # TODO for company, color in zip(companies, colors): fig.add_trace(go.Bar( x=df['Quarter'], y=df[company], name=company, marker_color=color )) fig.update_layout( title='Data Center Revenue of NVIDIA, AMD and Intel in Millions of U.S. Dollars per Quarter', xaxis_title='Quarter', yaxis_title='Data Center Revenue (Millions of U.S. Dollars)', barmode='stack', legend_title='Companies', height=800 ) return fig def create_size_for_performance_plot(category_to_display: str, parameter_type_to_display: str, model_to_compare: str) -> (go.Figure, gr.Dropdown, gr.Dropdown): with open('elo_results_20240915.pkl', 'rb') as file: elo_results = pickle.load(file) categories: list[str] = list(elo_results["text"].keys()) if category_to_display not in categories: raise gr.Error(message=f"Category '{category_to_display}' not found.") elo_ratings_for_category: dict = dict(elo_results["text"][category_to_display]["elo_rating_final"]) models: list[dict] = [] with open("models.jsonl", 'r') as file: for line in file: models.append(json.loads(line)) size_for_performance_data: list[dict] = [] for model_name, model_elo_rating in elo_ratings_for_category.items(): model_entries_found = [model for model in models if model["Name"] == model_name] if model_entries_found: size_for_performance_data.append({ "Name": model_name, "Release Date": model_entries_found[0]["Release Date"], "ELO Rating": model_elo_rating, parameter_type_to_display: model_entries_found[0][parameter_type_to_display] }) else: print(f"[WARNING] Model '{model_name}' not found in models.jsonl") comparison_model_elo_score = elo_ratings_for_category[model_to_compare] filtered_models = [model for model in size_for_performance_data if model[parameter_type_to_display] > 0 and model['ELO Rating'] >= comparison_model_elo_score] filtered_models.sort(key=lambda x: datetime.strptime(x['Release Date'], "%Y-%m-%d")) x_dates = [datetime.strptime(model['Release Date'], "%Y-%m-%d") for model in filtered_models] y_params = [] min_param = float('inf') for model in filtered_models: param = model[parameter_type_to_display] if param <= min_param: min_param = param y_params.append(min_param) fig = go.Figure() fig.add_trace(go.Scatter( x=x_dates, y=y_params, mode='lines', line=dict(shape='hv', width=2), name='Model Parameters' )) fig.update_layout( title=f'Model Size Progression for Open-Weights Models Reaching Performance of "{model_to_compare}" in "{category_to_display}" Category', xaxis_title='Release Date', yaxis_title=parameter_type_to_display, yaxis_type='log', hovermode='x unified', xaxis=dict( range=[date(2023, 2, 27), date(2024, 9, 15)], type='date' ), height=800 ) for i, model in enumerate(filtered_models): if i == 0 or y_params[i] < y_params[i - 1]: fig.add_trace(go.Scatter( x=[x_dates[i]], y=[y_params[i]], mode='markers+text', marker=dict(size=10), text=[model['Name']], textposition="top center", name=model['Name'] )) return (fig, gr.Dropdown(choices=categories, value=category_to_display, interactive=True), gr.Dropdown(choices=list(elo_ratings_for_category.keys()), value=model_to_compare, interactive=True)) def create_simple_plot(data_path: str, name: str, start_date: datetime, end_date: datetime, min_value: int = 0, max_value: int = 100) -> go.Figure: simple_bench_leaderboard = [] with open(data_path, 'r') as file: for line in file: simple_bench_leaderboard.append(json.loads(line)) models = [] with open("models.jsonl", 'r') as file: for line in file: models.append(json.loads(line)) data = [] for entry in simple_bench_leaderboard: model_name = entry['model'] score = entry['score'] model_info = next((m for m in models if m['Name'] == model_name), None) if model_info: release_date = datetime.strptime(model_info['Release Date'], "%Y-%m-%d") data.append({'model': model_name, 'score': score, 'release_date': release_date}) else: print(f"[WARNING] Model '{model_name}' not found in models.jsonl") data.sort(key=lambda x: x['release_date']) x_dates = [d['release_date'] for d in data] y_scores = [] max_score = 0 for entry in data: if entry['score'] > max_score: max_score = entry['score'] y_scores.append(max_score) fig = go.Figure() fig.add_trace(go.Scatter( x=x_dates, y=y_scores, mode='lines', line=dict(shape='hv', width=2), name='Best Score to Date' )) for i, entry in enumerate(data): if i == 0 or y_scores[i] > y_scores[i - 1]: fig.add_trace(go.Scatter( x=[entry['release_date']], y=[entry['score']], mode='markers+text', marker=dict(size=10), text=[entry['model']], textposition="top center", name=entry['model'] )) fig.update_layout( title=f'{name} Over Time', xaxis_title='Release Date', yaxis_title=name, hovermode='x unified', xaxis=dict( range=[start_date, end_date], type='date' ), yaxis=dict( range=[min_value, max_value] ), height=800 ) return fig with gr.Blocks() as demo: with gr.Tab("Finance"): with gr.Tab("Big Five Capex") as big_five_capex_tab: big_five_capex_plot: gr.Plot = gr.Plot() with gr.Tab("Chip Designers Data Center Revenue") as chip_designers_data_center_revenue_tab: chip_designers_data_center_revenue_plot: gr.Plot = gr.Plot() with gr.Tab("Model Efficiency Over Time"): with gr.Tab("Parameters Necessary for Specific Performance Level") as size_for_performance_tab: with gr.Row(): size_for_performance_category_dropdown: gr.Dropdown = gr.Dropdown(label="Category", value="full", choices=["full"], interactive=False) size_for_performance_parameter_number_dropdown: gr.Dropdown = gr.Dropdown(label="Parameter Number", choices=["Total Parameters", "Active Parameters"], value="Total Parameters", interactive=True) size_for_performance_comparison_model_dropdown: gr.Dropdown = gr.Dropdown(label="Model for Comparison", value="gpt-4-0314", choices=["gpt-4-0314"], interactive=False) size_for_performance_plot: gr.Plot = gr.Plot() size_for_performance_button: gr.Button = gr.Button("Show") size_for_performance_markdown: gr.Markdown = gr.Markdown( value="""Model performance as reported on [LMSYS Chatbot Arena Leaderboard](https://lmarena.ai/?leaderboard).""" ) with gr.Tab("API Cost for Specific Performance Level", interactive=False): api_cost_for_performance_plot: gr.Plot = gr.Plot() with gr.Tab("System Performance Over Time"): with gr.Tab("ARC-AGI-Pub") as arc_agi_tab: arc_agi_plot: gr.Plot = gr.Plot() with gr.Tab("Simple Bench") as simple_bench_tab: simple_bench_plot: gr.Plot = gr.Plot() with gr.Tab("PlanBench") as planbench_tab: planbench_plot: gr.Plot = gr.Plot() planbench_markdown: gr.Markdown = gr.Markdown( value="""Source: [Valmeekam et al. 2024](https://arxiv.org/abs/2409.13373)""" ) with gr.Tab("Codeforces") as codeforces_tab: with gr.Tab("General-Purpose Systems"): codeforces_plot: gr.Plot = gr.Plot() with gr.Tab("BigCodeBench", interactive=False): bigcodebench_plot: gr.Plot = gr.Plot() with gr.Tab("GAIA", interactive=False): gaia_plot: gr.Plot = gr.Plot() with gr.Tab("GPQA", interactive=False): gpqa_plot: gr.Plot = gr.Plot() with gr.Tab("HumanEval", interactive=False): humaneval_plot: gr.Plot = gr.Plot() with gr.Tab("Chatbot Arena", interactive=False): chatbot_arena_plot: gr.Plot = gr.Plot() with gr.Tab("MATH", interactive=False): math_plot: gr.Plot = gr.Plot() with gr.Tab("OpenCompass", interactive=False): opencompass_plot: gr.Plot = gr.Plot() with gr.Tab("SWE-bench", interactive=False): swe_bench_plot: gr.Plot = gr.Plot() with gr.Tab("WebArena", interactive=False): webarena_plot: gr.Plot = gr.Plot() with gr.Tab("ZeroEval", interactive=False): zeroeval_plot: gr.Plot = gr.Plot() with gr.Tab("Frontier Language Model Training Runs", interactive=False): with gr.Tab("Street Price of GPUs Used"): gpu_street_price_plot: gr.Plot = gr.Plot() with gr.Tab("TDP of GPUs Used"): tdp_gpus_plot: gr.Plot = gr.Plot() big_five_capex_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot) chip_designers_data_center_revenue_tab.select(fn=create_chip_designers_data_center_revenue_plot, outputs=chip_designers_data_center_revenue_plot) size_for_performance_button.click(fn=create_size_for_performance_plot, inputs=[size_for_performance_category_dropdown, size_for_performance_parameter_number_dropdown, size_for_performance_comparison_model_dropdown], outputs=[size_for_performance_plot, size_for_performance_category_dropdown, size_for_performance_comparison_model_dropdown]) arc_agi_tab.select(fn=create_simple_plot, inputs=[gr.State("arc_agi_leaderboard.jsonl"), gr.State("ARC-AGI-Pub (Public Eval) Score"), gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20))], outputs=arc_agi_plot) simple_bench_tab.select(fn=create_simple_plot, inputs=[gr.State("simple_bench_leaderboard.jsonl"), gr.State("Simple Bench Score"), gr.State(date(2023, 6, 13)), gr.State(date(2024, 8, 14))], outputs=simple_bench_plot) codeforces_tab.select(fn=create_simple_plot, inputs=[gr.State("codeforces_leaderboard.jsonl"), gr.State("Codeforces (Elo Rating)"), gr.State(date(2024, 5, 13)), gr.State(date(2024, 12, 20)), gr.State(800), gr.State(3000)], outputs=codeforces_plot) planbench_tab.select(fn=create_simple_plot, inputs=[gr.State("planbench_leaderboard.jsonl"), gr.State("PlanBench (Mystery Blocksworld, 0-shot) Score"), gr.State(date(2023, 3, 14)), gr.State(date(2024, 9, 23))], outputs=planbench_plot) if __name__ == "__main__": demo.launch()