{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/andrewreed/Documents/success_projects/closed-vs-open-arena-elo/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import pickle\n",
    "\n",
    "import pandas as pd\n",
    "from huggingface_hub import HfFileSystem, hf_hub_download"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Literal\n",
    "\n",
    "\n",
    "def download_latest_data_from_space(\n",
    "    repo_id: str, file_type: Literal[\"pkl\", \"csv\"]\n",
    ") -> str:\n",
    "    \"\"\"\n",
    "    Downloads the latest data file of the specified file type from the given repository space.\n",
    "\n",
    "    Args:\n",
    "        repo_id (str): The ID of the repository space.\n",
    "        file_type (Literal[\"pkl\", \"csv\"]): The type of the data file to download. Must be either \"pkl\" or \"csv\".\n",
    "\n",
    "    Returns:\n",
    "        str: The local file path of the downloaded data file.\n",
    "    \"\"\"\n",
    "\n",
    "    def extract_date(filename):\n",
    "        return filename.split(\"/\")[-1].split(\".\")[0].split(\"_\")[-1]\n",
    "\n",
    "    fs = HfFileSystem()\n",
    "    data_file_path = f\"spaces/{repo_id}/*.{file_type}\"\n",
    "    files = fs.glob(data_file_path)\n",
    "    latest_file = sorted(files, key=extract_date, reverse=True)[0]\n",
    "\n",
    "    latest_filepath_local = hf_hub_download(\n",
    "        repo_id=repo_id,\n",
    "        filename=latest_file.split(\"/\")[-1],\n",
    "        repo_type=\"space\",\n",
    "    )\n",
    "    return latest_filepath_local"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "latest_leaderboard_file_local = download_latest_data_from_space(\n",
    "    repo_id=\"lmsys/chatbot-arena-leaderboard\", file_type=\"csv\"\n",
    ")\n",
    "latest_elo_file_local = download_latest_data_from_space(\n",
    "    repo_id=\"lmsys/chatbot-arena-leaderboard\", file_type=\"pkl\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load and prepare ELO data\n",
    "key_to_category_name = {\n",
    "    \"full\": \"Overall\",\n",
    "    \"coding\": \"Coding\",\n",
    "    \"long_user\": \"Longer Query\",\n",
    "    \"english\": \"English\",\n",
    "    \"chinese\": \"Chinese\",\n",
    "    \"french\": \"French\",\n",
    "    \"no_tie\": \"Exclude Ties\",\n",
    "    \"no_short\": \"Exclude Short Query (< 5 tokens)\",\n",
    "    \"no_refusal\": \"Exclude Refusal\",\n",
    "}\n",
    "cat_name_to_explanation = {\n",
    "    \"Overall\": \"Overall Questions\",\n",
    "    \"Coding\": \"Coding: whether conversation contains code snippets\",\n",
    "    \"Longer Query\": \"Longer Query (>= 500 tokens)\",\n",
    "    \"English\": \"English Prompts\",\n",
    "    \"Chinese\": \"Chinese Prompts\",\n",
    "    \"French\": \"French Prompts\",\n",
    "    \"Exclude Ties\": \"Exclude Ties and Bothbad\",\n",
    "    \"Exclude Short Query (< 5 tokens)\": \"Exclude Short User Query (< 5 tokens)\",\n",
    "    \"Exclude Refusal\": 'Exclude model responses with refusal (e.g., \"I cannot answer\")',\n",
    "}\n",
    "\n",
    "with open(latest_elo_file_local, \"rb\") as fin:\n",
    "    elo_results = pickle.load(fin)\n",
    "\n",
    "arena_dfs = {}\n",
    "for k in key_to_category_name.keys():\n",
    "    if k not in elo_results:\n",
    "        continue\n",
    "    arena_dfs[key_to_category_name[k]] = elo_results[k][\"leaderboard_table_df\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['Overall', 'Coding', 'Longer Query', 'English', 'Chinese', 'French', 'Exclude Ties', 'Exclude Short Query (< 5 tokens)', 'Exclude Refusal'])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arena_dfs.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rating</th>\n",
       "      <th>variance</th>\n",
       "      <th>rating_q975</th>\n",
       "      <th>rating_q025</th>\n",
       "      <th>num_battles</th>\n",
       "      <th>final_ranking</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>RWKV-4-Raven-14B</th>\n",
       "      <td>928.451251</td>\n",
       "      <td>26.146415</td>\n",
       "      <td>937.017097</td>\n",
       "      <td>919.444359</td>\n",
       "      <td>5129</td>\n",
       "      <td>82</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alpaca-13b</th>\n",
       "      <td>908.084359</td>\n",
       "      <td>18.598539</td>\n",
       "      <td>915.348707</td>\n",
       "      <td>900.602847</td>\n",
       "      <td>6111</td>\n",
       "      <td>86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bard-jan-24-gemini-pro</th>\n",
       "      <td>1208.712877</td>\n",
       "      <td>7.975296</td>\n",
       "      <td>1213.331583</td>\n",
       "      <td>1203.004139</td>\n",
       "      <td>12387</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chatglm-6b</th>\n",
       "      <td>886.873429</td>\n",
       "      <td>19.813751</td>\n",
       "      <td>894.785321</td>\n",
       "      <td>878.677878</td>\n",
       "      <td>5195</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chatglm2-6b</th>\n",
       "      <td>933.337288</td>\n",
       "      <td>33.939472</td>\n",
       "      <td>944.493496</td>\n",
       "      <td>921.470740</td>\n",
       "      <td>2880</td>\n",
       "      <td>82</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wizardlm-70b</th>\n",
       "      <td>1108.552744</td>\n",
       "      <td>8.988005</td>\n",
       "      <td>1114.390689</td>\n",
       "      <td>1102.745236</td>\n",
       "      <td>8867</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>yi-34b-chat</th>\n",
       "      <td>1111.132640</td>\n",
       "      <td>7.801741</td>\n",
       "      <td>1115.356993</td>\n",
       "      <td>1105.658254</td>\n",
       "      <td>13177</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zephyr-7b-alpha</th>\n",
       "      <td>1043.084267</td>\n",
       "      <td>45.472021</td>\n",
       "      <td>1054.269954</td>\n",
       "      <td>1027.602171</td>\n",
       "      <td>1901</td>\n",
       "      <td>57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zephyr-7b-beta</th>\n",
       "      <td>1054.416300</td>\n",
       "      <td>11.094606</td>\n",
       "      <td>1060.265072</td>\n",
       "      <td>1047.790509</td>\n",
       "      <td>11924</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zephyr-orpo-141b-A35b-v0.1</th>\n",
       "      <td>1128.816337</td>\n",
       "      <td>16.964385</td>\n",
       "      <td>1134.862680</td>\n",
       "      <td>1119.183571</td>\n",
       "      <td>5207</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>92 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 rating   variance  rating_q975  rating_q025  \\\n",
       "RWKV-4-Raven-14B             928.451251  26.146415   937.017097   919.444359   \n",
       "alpaca-13b                   908.084359  18.598539   915.348707   900.602847   \n",
       "bard-jan-24-gemini-pro      1208.712877   7.975296  1213.331583  1203.004139   \n",
       "chatglm-6b                   886.873429  19.813751   894.785321   878.677878   \n",
       "chatglm2-6b                  933.337288  33.939472   944.493496   921.470740   \n",
       "...                                 ...        ...          ...          ...   \n",
       "wizardlm-70b                1108.552744   8.988005  1114.390689  1102.745236   \n",
       "yi-34b-chat                 1111.132640   7.801741  1115.356993  1105.658254   \n",
       "zephyr-7b-alpha             1043.084267  45.472021  1054.269954  1027.602171   \n",
       "zephyr-7b-beta              1054.416300  11.094606  1060.265072  1047.790509   \n",
       "zephyr-orpo-141b-A35b-v0.1  1128.816337  16.964385  1134.862680  1119.183571   \n",
       "\n",
       "                            num_battles  final_ranking  \n",
       "RWKV-4-Raven-14B                   5129             82  \n",
       "alpaca-13b                         6111             86  \n",
       "bard-jan-24-gemini-pro            12387              6  \n",
       "chatglm-6b                         5195             87  \n",
       "chatglm2-6b                        2880             82  \n",
       "...                                 ...            ...  \n",
       "wizardlm-70b                       8867             29  \n",
       "yi-34b-chat                       13177             29  \n",
       "zephyr-7b-alpha                    1901             57  \n",
       "zephyr-7b-beta                    11924             55  \n",
       "zephyr-orpo-141b-A35b-v0.1         5207             22  \n",
       "\n",
       "[92 rows x 6 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arena_dfs[\"Overall\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load and prepare Leaderboard data\n",
    "leaderboard_df = pd.read_csv(latest_leaderboard_file_local)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>Model</th>\n",
       "      <th>MT-bench (score)</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>Knowledge cutoff date</th>\n",
       "      <th>License</th>\n",
       "      <th>Organization</th>\n",
       "      <th>Link</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>wizardlm-30b</td>\n",
       "      <td>WizardLM-30B</td>\n",
       "      <td>7.01</td>\n",
       "      <td>0.587</td>\n",
       "      <td>2023/6</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>Microsoft</td>\n",
       "      <td>https://huggingface.co/WizardLM/WizardLM-30B-V1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>vicuna-13b-16k</td>\n",
       "      <td>Vicuna-13B-16k</td>\n",
       "      <td>6.92</td>\n",
       "      <td>0.545</td>\n",
       "      <td>2023/7</td>\n",
       "      <td>Llama 2 Community</td>\n",
       "      <td>LMSYS</td>\n",
       "      <td>https://huggingface.co/lmsys/vicuna-13b-v1.5-16k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>wizardlm-13b-v1.1</td>\n",
       "      <td>WizardLM-13B-v1.1</td>\n",
       "      <td>6.76</td>\n",
       "      <td>0.500</td>\n",
       "      <td>2023/7</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>Microsoft</td>\n",
       "      <td>https://huggingface.co/WizardLM/WizardLM-13B-V1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>tulu-30b</td>\n",
       "      <td>Tulu-30B</td>\n",
       "      <td>6.43</td>\n",
       "      <td>0.581</td>\n",
       "      <td>2023/6</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>AllenAI/UW</td>\n",
       "      <td>https://huggingface.co/allenai/tulu-30b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>guanaco-65b</td>\n",
       "      <td>Guanaco-65B</td>\n",
       "      <td>6.41</td>\n",
       "      <td>0.621</td>\n",
       "      <td>2023/5</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>UW</td>\n",
       "      <td>https://huggingface.co/timdettmers/guanaco-65b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>llama-3-70b-instruct</td>\n",
       "      <td>Llama-3-70b-Instruct</td>\n",
       "      <td>-</td>\n",
       "      <td>0.820</td>\n",
       "      <td>2023/12</td>\n",
       "      <td>Llama 3 Community</td>\n",
       "      <td>Meta</td>\n",
       "      <td>https://llama.meta.com/llama3/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>llama-3-8b-instruct</td>\n",
       "      <td>Llama-3-8b-Instruct</td>\n",
       "      <td>-</td>\n",
       "      <td>0.684</td>\n",
       "      <td>2023/3</td>\n",
       "      <td>Llama 3 Community</td>\n",
       "      <td>Meta</td>\n",
       "      <td>https://llama.meta.com/llama3/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>gemini-1.5-pro-api-0409-preview</td>\n",
       "      <td>Gemini 1.5 Pro API-0409-Preview</td>\n",
       "      <td>-</td>\n",
       "      <td>0.819</td>\n",
       "      <td>2023/11</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Google</td>\n",
       "      <td>https://blog.google/technology/ai/google-gemin...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>104</th>\n",
       "      <td>phi-3-mini-128k-instruct</td>\n",
       "      <td>Phi-3-Mini-128k-Instruct</td>\n",
       "      <td>-</td>\n",
       "      <td>0.681</td>\n",
       "      <td>2023/10</td>\n",
       "      <td>MIT</td>\n",
       "      <td>Microsoft</td>\n",
       "      <td>https://azure.microsoft.com/en-us/blog/introdu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105</th>\n",
       "      <td>snowflake-arctic-instruct</td>\n",
       "      <td>Snowflake Arctic Instruct</td>\n",
       "      <td>-</td>\n",
       "      <td>0.673</td>\n",
       "      <td>2024/4</td>\n",
       "      <td>Apache 2.0</td>\n",
       "      <td>Snowflake</td>\n",
       "      <td>https://www.snowflake.com/blog/arctic-open-eff...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>106 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 key                            Model  \\\n",
       "0                       wizardlm-30b                     WizardLM-30B   \n",
       "1                     vicuna-13b-16k                   Vicuna-13B-16k   \n",
       "2                  wizardlm-13b-v1.1                WizardLM-13B-v1.1   \n",
       "3                           tulu-30b                         Tulu-30B   \n",
       "4                        guanaco-65b                      Guanaco-65B   \n",
       "..                               ...                              ...   \n",
       "101             llama-3-70b-instruct             Llama-3-70b-Instruct   \n",
       "102              llama-3-8b-instruct              Llama-3-8b-Instruct   \n",
       "103  gemini-1.5-pro-api-0409-preview  Gemini 1.5 Pro API-0409-Preview   \n",
       "104         phi-3-mini-128k-instruct         Phi-3-Mini-128k-Instruct   \n",
       "105        snowflake-arctic-instruct        Snowflake Arctic Instruct   \n",
       "\n",
       "    MT-bench (score)   MMLU Knowledge cutoff date            License  \\\n",
       "0               7.01  0.587                2023/6     Non-commercial   \n",
       "1               6.92  0.545                2023/7  Llama 2 Community   \n",
       "2               6.76  0.500                2023/7     Non-commercial   \n",
       "3               6.43  0.581                2023/6     Non-commercial   \n",
       "4               6.41  0.621                2023/5     Non-commercial   \n",
       "..               ...    ...                   ...                ...   \n",
       "101                -  0.820               2023/12  Llama 3 Community   \n",
       "102                -  0.684                2023/3  Llama 3 Community   \n",
       "103                -  0.819               2023/11        Proprietary   \n",
       "104                -  0.681               2023/10                MIT   \n",
       "105                -  0.673                2024/4         Apache 2.0   \n",
       "\n",
       "    Organization                                               Link  \n",
       "0      Microsoft  https://huggingface.co/WizardLM/WizardLM-30B-V1.0  \n",
       "1          LMSYS   https://huggingface.co/lmsys/vicuna-13b-v1.5-16k  \n",
       "2      Microsoft  https://huggingface.co/WizardLM/WizardLM-13B-V1.1  \n",
       "3     AllenAI/UW            https://huggingface.co/allenai/tulu-30b  \n",
       "4             UW  https://huggingface.co/timdettmers/guanaco-65b...  \n",
       "..           ...                                                ...  \n",
       "101         Meta                     https://llama.meta.com/llama3/  \n",
       "102         Meta                     https://llama.meta.com/llama3/  \n",
       "103       Google  https://blog.google/technology/ai/google-gemin...  \n",
       "104    Microsoft  https://azure.microsoft.com/en-us/blog/introdu...  \n",
       "105    Snowflake  https://www.snowflake.com/blog/arctic-open-eff...  \n",
       "\n",
       "[106 rows x 8 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "leaderboard_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['Overall', 'Coding', 'Longer Query', 'English', 'Chinese', 'French', 'Exclude Ties', 'Exclude Short Query (< 5 tokens)', 'Exclude Refusal'])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arena_dfs.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# merge ELO and Leaderboard data\n",
    "merged_dfs = {}\n",
    "for k, v in arena_dfs.items():\n",
    "    merged_dfs[k] = (\n",
    "        pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on=\"key\")\n",
    "        .sort_values(\"rating\", ascending=False)\n",
    "        .reset_index(drop=True)\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rating</th>\n",
       "      <th>variance</th>\n",
       "      <th>rating_q975</th>\n",
       "      <th>rating_q025</th>\n",
       "      <th>num_battles</th>\n",
       "      <th>final_ranking</th>\n",
       "      <th>key</th>\n",
       "      <th>Model</th>\n",
       "      <th>MT-bench (score)</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>Knowledge cutoff date</th>\n",
       "      <th>License</th>\n",
       "      <th>Organization</th>\n",
       "      <th>Link</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1258.815279</td>\n",
       "      <td>3.258132</td>\n",
       "      <td>1262.796713</td>\n",
       "      <td>1256.000508</td>\n",
       "      <td>35931</td>\n",
       "      <td>1</td>\n",
       "      <td>gpt-4-turbo-2024-04-09</td>\n",
       "      <td>GPT-4-Turbo-2024-04-09</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2023/12</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://platform.openai.com/docs/models/gpt-4-...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1252.684886</td>\n",
       "      <td>1.799233</td>\n",
       "      <td>1254.748391</td>\n",
       "      <td>1249.873417</td>\n",
       "      <td>73547</td>\n",
       "      <td>2</td>\n",
       "      <td>gpt-4-1106-preview</td>\n",
       "      <td>GPT-4-1106-preview</td>\n",
       "      <td>9.32</td>\n",
       "      <td>-</td>\n",
       "      <td>2023/4</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://openai.com/blog/new-models-and-develop...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1250.926206</td>\n",
       "      <td>2.018201</td>\n",
       "      <td>1253.851885</td>\n",
       "      <td>1248.166034</td>\n",
       "      <td>80997</td>\n",
       "      <td>2</td>\n",
       "      <td>claude-3-opus-20240229</td>\n",
       "      <td>Claude 3 Opus</td>\n",
       "      <td>-</td>\n",
       "      <td>0.868</td>\n",
       "      <td>2023/8</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Anthropic</td>\n",
       "      <td>https://www.anthropic.com/news/claude-3-family</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1249.618395</td>\n",
       "      <td>3.233129</td>\n",
       "      <td>1252.956497</td>\n",
       "      <td>1246.247080</td>\n",
       "      <td>39482</td>\n",
       "      <td>2</td>\n",
       "      <td>gemini-1.5-pro-api-0409-preview</td>\n",
       "      <td>Gemini 1.5 Pro API-0409-Preview</td>\n",
       "      <td>-</td>\n",
       "      <td>0.819</td>\n",
       "      <td>2023/11</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Google</td>\n",
       "      <td>https://blog.google/technology/ai/google-gemin...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1246.777591</td>\n",
       "      <td>1.942477</td>\n",
       "      <td>1249.979712</td>\n",
       "      <td>1244.305362</td>\n",
       "      <td>67354</td>\n",
       "      <td>2</td>\n",
       "      <td>gpt-4-0125-preview</td>\n",
       "      <td>GPT-4-0125-preview</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2023/12</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://openai.com/blog/new-models-and-develop...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>886.873429</td>\n",
       "      <td>19.813751</td>\n",
       "      <td>894.785321</td>\n",
       "      <td>878.677878</td>\n",
       "      <td>5195</td>\n",
       "      <td>87</td>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>ChatGLM-6B</td>\n",
       "      <td>4.50</td>\n",
       "      <td>0.361</td>\n",
       "      <td>2023/3</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>Tsinghua</td>\n",
       "      <td>https://huggingface.co/THUDM/chatglm-6b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>876.929108</td>\n",
       "      <td>27.115855</td>\n",
       "      <td>887.355529</td>\n",
       "      <td>866.860534</td>\n",
       "      <td>4521</td>\n",
       "      <td>88</td>\n",
       "      <td>fastchat-t5-3b</td>\n",
       "      <td>FastChat-T5-3B</td>\n",
       "      <td>3.04</td>\n",
       "      <td>0.477</td>\n",
       "      <td>2023/4</td>\n",
       "      <td>Apache 2.0</td>\n",
       "      <td>LMSYS</td>\n",
       "      <td>https://huggingface.co/lmsys/fastchat-t5-3b-v1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89</th>\n",
       "      <td>848.932568</td>\n",
       "      <td>36.961459</td>\n",
       "      <td>859.103936</td>\n",
       "      <td>837.364341</td>\n",
       "      <td>3461</td>\n",
       "      <td>90</td>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>StableLM-Tuned-Alpha-7B</td>\n",
       "      <td>2.75</td>\n",
       "      <td>0.244</td>\n",
       "      <td>2023/4</td>\n",
       "      <td>CC-BY-NC-SA-4.0</td>\n",
       "      <td>Stability AI</td>\n",
       "      <td>https://huggingface.co/stabilityai/stablelm-tu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90</th>\n",
       "      <td>826.647332</td>\n",
       "      <td>30.156414</td>\n",
       "      <td>837.335988</td>\n",
       "      <td>816.370788</td>\n",
       "      <td>3666</td>\n",
       "      <td>91</td>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>Dolly-V2-12B</td>\n",
       "      <td>3.28</td>\n",
       "      <td>0.257</td>\n",
       "      <td>2023/4</td>\n",
       "      <td>MIT</td>\n",
       "      <td>Databricks</td>\n",
       "      <td>https://huggingface.co/databricks/dolly-v2-12b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>804.356329</td>\n",
       "      <td>44.756983</td>\n",
       "      <td>815.161492</td>\n",
       "      <td>790.879536</td>\n",
       "      <td>2538</td>\n",
       "      <td>92</td>\n",
       "      <td>llama-13b</td>\n",
       "      <td>LLaMA-13B</td>\n",
       "      <td>2.61</td>\n",
       "      <td>0.470</td>\n",
       "      <td>2023/2</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>Meta</td>\n",
       "      <td>https://arxiv.org/abs/2302.13971</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>92 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         rating   variance  rating_q975  rating_q025  num_battles  \\\n",
       "0   1258.815279   3.258132  1262.796713  1256.000508        35931   \n",
       "1   1252.684886   1.799233  1254.748391  1249.873417        73547   \n",
       "2   1250.926206   2.018201  1253.851885  1248.166034        80997   \n",
       "3   1249.618395   3.233129  1252.956497  1246.247080        39482   \n",
       "4   1246.777591   1.942477  1249.979712  1244.305362        67354   \n",
       "..          ...        ...          ...          ...          ...   \n",
       "87   886.873429  19.813751   894.785321   878.677878         5195   \n",
       "88   876.929108  27.115855   887.355529   866.860534         4521   \n",
       "89   848.932568  36.961459   859.103936   837.364341         3461   \n",
       "90   826.647332  30.156414   837.335988   816.370788         3666   \n",
       "91   804.356329  44.756983   815.161492   790.879536         2538   \n",
       "\n",
       "    final_ranking                              key  \\\n",
       "0               1           gpt-4-turbo-2024-04-09   \n",
       "1               2               gpt-4-1106-preview   \n",
       "2               2           claude-3-opus-20240229   \n",
       "3               2  gemini-1.5-pro-api-0409-preview   \n",
       "4               2               gpt-4-0125-preview   \n",
       "..            ...                              ...   \n",
       "87             87                       chatglm-6b   \n",
       "88             88                   fastchat-t5-3b   \n",
       "89             90          stablelm-tuned-alpha-7b   \n",
       "90             91                     dolly-v2-12b   \n",
       "91             92                        llama-13b   \n",
       "\n",
       "                              Model MT-bench (score)   MMLU  \\\n",
       "0            GPT-4-Turbo-2024-04-09                -      -   \n",
       "1                GPT-4-1106-preview             9.32      -   \n",
       "2                     Claude 3 Opus                -  0.868   \n",
       "3   Gemini 1.5 Pro API-0409-Preview                -  0.819   \n",
       "4                GPT-4-0125-preview                -      -   \n",
       "..                              ...              ...    ...   \n",
       "87                       ChatGLM-6B             4.50  0.361   \n",
       "88                   FastChat-T5-3B             3.04  0.477   \n",
       "89          StableLM-Tuned-Alpha-7B             2.75  0.244   \n",
       "90                     Dolly-V2-12B             3.28  0.257   \n",
       "91                        LLaMA-13B             2.61  0.470   \n",
       "\n",
       "   Knowledge cutoff date          License  Organization  \\\n",
       "0                2023/12      Proprietary        OpenAI   \n",
       "1                 2023/4      Proprietary        OpenAI   \n",
       "2                 2023/8      Proprietary     Anthropic   \n",
       "3                2023/11      Proprietary        Google   \n",
       "4                2023/12      Proprietary        OpenAI   \n",
       "..                   ...              ...           ...   \n",
       "87                2023/3   Non-commercial      Tsinghua   \n",
       "88                2023/4       Apache 2.0         LMSYS   \n",
       "89                2023/4  CC-BY-NC-SA-4.0  Stability AI   \n",
       "90                2023/4              MIT    Databricks   \n",
       "91                2023/2   Non-commercial          Meta   \n",
       "\n",
       "                                                 Link  \n",
       "0   https://platform.openai.com/docs/models/gpt-4-...  \n",
       "1   https://openai.com/blog/new-models-and-develop...  \n",
       "2      https://www.anthropic.com/news/claude-3-family  \n",
       "3   https://blog.google/technology/ai/google-gemin...  \n",
       "4   https://openai.com/blog/new-models-and-develop...  \n",
       "..                                                ...  \n",
       "87            https://huggingface.co/THUDM/chatglm-6b  \n",
       "88   https://huggingface.co/lmsys/fastchat-t5-3b-v1.0  \n",
       "89  https://huggingface.co/stabilityai/stablelm-tu...  \n",
       "90     https://huggingface.co/databricks/dolly-v2-12b  \n",
       "91                   https://arxiv.org/abs/2302.13971  \n",
       "\n",
       "[92 rows x 14 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_dfs[\"Overall\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Manually map release dates - MEH."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = merged_dfs[\"Overall\"].loc[:, [\"key\", \"Model\"]]\n",
    "t[\"Release Date\"] = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "release_date_mapping = pd.read_json(\"release_date_mapping.json\", orient=\"records\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>Model</th>\n",
       "      <th>Release Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>gpt-4-turbo-2024-04-09</td>\n",
       "      <td>GPT-4-Turbo-2024-04-09</td>\n",
       "      <td>2024-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>gpt-4-1106-preview</td>\n",
       "      <td>GPT-4-1106-preview</td>\n",
       "      <td>2023-11-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>claude-3-opus-20240229</td>\n",
       "      <td>Claude 3 Opus</td>\n",
       "      <td>2024-02-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>gemini-1.5-pro-api-0409-preview</td>\n",
       "      <td>Gemini 1.5 Pro API-0409-Preview</td>\n",
       "      <td>2024-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>gpt-4-0125-preview</td>\n",
       "      <td>GPT-4-0125-preview</td>\n",
       "      <td>2024-01-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>ChatGLM-6B</td>\n",
       "      <td>2023-03-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>fastchat-t5-3b</td>\n",
       "      <td>FastChat-T5-3B</td>\n",
       "      <td>2023-04-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>StableLM-Tuned-Alpha-7B</td>\n",
       "      <td>2023-04-19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>Dolly-V2-12B</td>\n",
       "      <td>2023-04-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90</th>\n",
       "      <td>llama-13b</td>\n",
       "      <td>LLaMA-13B</td>\n",
       "      <td>2023-02-27</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>91 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                key                            Model  \\\n",
       "0            gpt-4-turbo-2024-04-09           GPT-4-Turbo-2024-04-09   \n",
       "1                gpt-4-1106-preview               GPT-4-1106-preview   \n",
       "2            claude-3-opus-20240229                    Claude 3 Opus   \n",
       "3   gemini-1.5-pro-api-0409-preview  Gemini 1.5 Pro API-0409-Preview   \n",
       "4                gpt-4-0125-preview               GPT-4-0125-preview   \n",
       "..                              ...                              ...   \n",
       "86                       chatglm-6b                       ChatGLM-6B   \n",
       "87                   fastchat-t5-3b                   FastChat-T5-3B   \n",
       "88          stablelm-tuned-alpha-7b          StableLM-Tuned-Alpha-7B   \n",
       "89                     dolly-v2-12b                     Dolly-V2-12B   \n",
       "90                        llama-13b                        LLaMA-13B   \n",
       "\n",
       "   Release Date  \n",
       "0    2024-04-09  \n",
       "1    2023-11-06  \n",
       "2    2024-02-29  \n",
       "3    2024-04-09  \n",
       "4    2024-01-25  \n",
       "..          ...  \n",
       "86   2023-03-13  \n",
       "87   2023-04-27  \n",
       "88   2023-04-19  \n",
       "89   2023-04-12  \n",
       "90   2023-02-27  \n",
       "\n",
       "[91 rows x 3 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "release_date_mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>Release Date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>gpt-4-turbo-2024-04-09</td>\n",
       "      <td>2024-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>gpt-4-1106-preview</td>\n",
       "      <td>2023-11-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>claude-3-opus-20240229</td>\n",
       "      <td>2024-02-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>gemini-1.5-pro-api-0409-preview</td>\n",
       "      <td>2024-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>gpt-4-0125-preview</td>\n",
       "      <td>2024-01-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>chatglm-6b</td>\n",
       "      <td>2023-03-13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>fastchat-t5-3b</td>\n",
       "      <td>2023-04-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>stablelm-tuned-alpha-7b</td>\n",
       "      <td>2023-04-19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89</th>\n",
       "      <td>dolly-v2-12b</td>\n",
       "      <td>2023-04-12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90</th>\n",
       "      <td>llama-13b</td>\n",
       "      <td>2023-02-27</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>91 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                key Release Date\n",
       "0            gpt-4-turbo-2024-04-09   2024-04-09\n",
       "1                gpt-4-1106-preview   2023-11-06\n",
       "2            claude-3-opus-20240229   2024-02-29\n",
       "3   gemini-1.5-pro-api-0409-preview   2024-04-09\n",
       "4                gpt-4-0125-preview   2024-01-25\n",
       "..                              ...          ...\n",
       "86                       chatglm-6b   2023-03-13\n",
       "87                   fastchat-t5-3b   2023-04-27\n",
       "88          stablelm-tuned-alpha-7b   2023-04-19\n",
       "89                     dolly-v2-12b   2023-04-12\n",
       "90                        llama-13b   2023-02-27\n",
       "\n",
       "[91 rows x 2 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "release_date_mapping[[\"key\", \"Release Date\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# add release dates into the merged data\n",
    "for k, v in merged_dfs.items():\n",
    "    merged_dfs[k] = pd.merge(\n",
    "        merged_dfs[k], release_date_mapping[[\"key\", \"Release Date\"]], on=\"key\"\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['rating', 'variance', 'rating_q975', 'rating_q025', 'num_battles',\n",
       "       'final_ranking', 'key', 'Model', 'MT-bench (score)', 'MMLU',\n",
       "       'Knowledge cutoff date', 'License', 'Organization', 'Link',\n",
       "       'Release Date'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_dfs[\"Overall\"].columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "def format_data(df):\n",
    "    df[\"License\"] = df[\"License\"].apply(\n",
    "        lambda x: \"Proprietary LLM\" if x in PROPRIETARY_LICENSES else \"Open LLM\"\n",
    "    )\n",
    "    df[\"Release Date\"] = pd.to_datetime(df[\"Release Date\"])\n",
    "    df[\"Month-Year\"] = df[\"Release Date\"].dt.to_period(\"M\")\n",
    "    df[\"rating\"] = df[\"rating\"].round()\n",
    "    return df.reset_index(drop=True)\n",
    "\n",
    "\n",
    "merged_dfs2 = {k: format_data(v) for k, v in merged_dfs.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n"
     ]
    }
   ],
   "source": [
    "for k, df in merged_dfs2.items():\n",
    "    print(\n",
    "        int(\n",
    "            df.groupby([\"Release Date\", \"License\"])[\"rating\"]\n",
    "            .apply(lambda x: len(x))\n",
    "            .max()\n",
    "        )\n",
    "    )\n",
    "    (df[\"rating\"].min().round(),)\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build plot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = {\n",
    "    \"Overall\": {\n",
    "        \"min_elo_score\": 804.0,\n",
    "        \"max_elo_score\": 1259.0,\n",
    "        \"upper_models_per_month\": 5,\n",
    "    },\n",
    "    \"Coding\": {\n",
    "        \"min_elo_score\": 672.0,\n",
    "        \"max_elo_score\": 1270.0,\n",
    "        \"upper_models_per_month\": 5,\n",
    "    },\n",
    "    \"Longer Query\": {\n",
    "        \"min_elo_score\": 796.0,\n",
    "        \"max_elo_score\": 1273.0,\n",
    "        \"upper_models_per_month\": 5,\n",
    "    },\n",
    "    \"English\": {\n",
    "        \"min_elo_score\": 783.0,\n",
    "        \"max_elo_score\": 1246.0,\n",
    "        \"upper_models_per_month\": 5,\n",
    "    },\n",
    "    \"Chinese\": {\n",
    "        \"min_elo_score\": 753.0,\n",
    "        \"max_elo_score\": 1325.0,\n",
    "        \"upper_models_per_month\": 5,\n",
    "    },\n",
    "    \"French\": {\n",
    "        \"min_elo_score\": 694.0,\n",
    "        \"max_elo_score\": 1268.0,\n",
    "        \"upper_models_per_month\": 5,\n",
    "    },\n",
    "    \"Exclude Ties\": {\n",
    "        \"min_elo_score\": 654.0,\n",
    "        \"max_elo_score\": 1334.0,\n",
    "        \"upper_models_per_month\": 5,\n",
    "    },\n",
    "    \"Exclude Short Query (< 5 tokens)\": {\n",
    "        \"min_elo_score\": 796.0,\n",
    "        \"max_elo_score\": 1264.0,\n",
    "        \"upper_models_per_month\": 5,\n",
    "    },\n",
    "    \"Exclude Refusal\": {\n",
    "        \"min_elo_score\": 795.0,\n",
    "        \"max_elo_score\": 1264.0,\n",
    "        \"upper_models_per_month\": 5,\n",
    "    },\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "o = {\n",
    "    \"min_elo_score\": <minimum>,\n",
    "    \"max_elo_score\": <maximum>,\n",
    "    \"upper_models_per_month\": <maximum>,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "PROPRIETARY_LICENSES = [\n",
    "    \"Proprietary\",\n",
    "    \"Non-commercial\",\n",
    "]\n",
    "\n",
    "df = merged_dfs[\"Overall\"]\n",
    "df[\"License\"] = df[\"License\"].apply(\n",
    "    lambda x: \"Proprietary LLM\" if x in PROPRIETARY_LICENSES else \"Open LLM\"\n",
    ")\n",
    "df[\"Release Date\"] = pd.to_datetime(df[\"Release Date\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"Month-Year\"] = df[\"Release Date\"].dt.to_period(\"M\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby([\"Month-Year\", \"License\"])[\"rating\"].apply(lambda x: x.count()).max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rating</th>\n",
       "      <th>variance</th>\n",
       "      <th>rating_q975</th>\n",
       "      <th>rating_q025</th>\n",
       "      <th>num_battles</th>\n",
       "      <th>final_ranking</th>\n",
       "      <th>key</th>\n",
       "      <th>Model</th>\n",
       "      <th>MT-bench (score)</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>Knowledge cutoff date</th>\n",
       "      <th>License</th>\n",
       "      <th>Organization</th>\n",
       "      <th>Link</th>\n",
       "      <th>Release Date</th>\n",
       "      <th>license_binary</th>\n",
       "      <th>Month-Year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1246.777591</td>\n",
       "      <td>1.942477</td>\n",
       "      <td>1249.979712</td>\n",
       "      <td>1244.305362</td>\n",
       "      <td>67354</td>\n",
       "      <td>2</td>\n",
       "      <td>gpt-4-0125-preview</td>\n",
       "      <td>GPT-4-0125-preview</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2023/12</td>\n",
       "      <td>Proprietary LLM</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://openai.com/blog/new-models-and-develop...</td>\n",
       "      <td>2024-01-25</td>\n",
       "      <td>Proprietary LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>1111.132640</td>\n",
       "      <td>7.801741</td>\n",
       "      <td>1115.356993</td>\n",
       "      <td>1105.658254</td>\n",
       "      <td>13177</td>\n",
       "      <td>29</td>\n",
       "      <td>yi-34b-chat</td>\n",
       "      <td>Yi-34B-Chat</td>\n",
       "      <td>-</td>\n",
       "      <td>0.735</td>\n",
       "      <td>2023/6</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>01 AI</td>\n",
       "      <td>https://huggingface.co/01-ai/Yi-34B-Chat</td>\n",
       "      <td>2024-01-23</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>1107.129810</td>\n",
       "      <td>2.419182</td>\n",
       "      <td>1110.056188</td>\n",
       "      <td>1104.002581</td>\n",
       "      <td>47220</td>\n",
       "      <td>32</td>\n",
       "      <td>gpt-3.5-turbo-0125</td>\n",
       "      <td>GPT-3.5-Turbo-0125</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2021/9</td>\n",
       "      <td>Proprietary LLM</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://platform.openai.com/docs/models/gpt-3-...</td>\n",
       "      <td>2024-01-25</td>\n",
       "      <td>Proprietary LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>1098.527455</td>\n",
       "      <td>6.400166</td>\n",
       "      <td>1103.343592</td>\n",
       "      <td>1093.903695</td>\n",
       "      <td>14159</td>\n",
       "      <td>36</td>\n",
       "      <td>openchat-3.5-0106</td>\n",
       "      <td>OpenChat-3.5-0106</td>\n",
       "      <td>7.8</td>\n",
       "      <td>0.658</td>\n",
       "      <td>2024/1</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>OpenChat</td>\n",
       "      <td>https://huggingface.co/openchat/openchat-3.5-0106</td>\n",
       "      <td>2024-01-06</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>1087.307758</td>\n",
       "      <td>18.314258</td>\n",
       "      <td>1094.532598</td>\n",
       "      <td>1078.413814</td>\n",
       "      <td>3980</td>\n",
       "      <td>40</td>\n",
       "      <td>nous-hermes-2-mixtral-8x7b-dpo</td>\n",
       "      <td>Nous-Hermes-2-Mixtral-8x7B-DPO</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2024/1</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>NousResearch</td>\n",
       "      <td>https://huggingface.co/NousResearch/Nous-Herme...</td>\n",
       "      <td>2024-01-13</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>1047.927688</td>\n",
       "      <td>60.707225</td>\n",
       "      <td>1061.952116</td>\n",
       "      <td>1034.283514</td>\n",
       "      <td>1321</td>\n",
       "      <td>55</td>\n",
       "      <td>codellama-70b-instruct</td>\n",
       "      <td>CodeLlama-70B-instruct</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2024/1</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>Meta</td>\n",
       "      <td>https://huggingface.co/codellama/CodeLlama-70b-hf</td>\n",
       "      <td>2024-01-29</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         rating   variance  rating_q975  rating_q025  num_battles  \\\n",
       "4   1246.777591   1.942477  1249.979712  1244.305362        67354   \n",
       "32  1111.132640   7.801741  1115.356993  1105.658254        13177   \n",
       "36  1107.129810   2.419182  1110.056188  1104.002581        47220   \n",
       "39  1098.527455   6.400166  1103.343592  1093.903695        14159   \n",
       "43  1087.307758  18.314258  1094.532598  1078.413814         3980   \n",
       "60  1047.927688  60.707225  1061.952116  1034.283514         1321   \n",
       "\n",
       "    final_ranking                             key  \\\n",
       "4               2              gpt-4-0125-preview   \n",
       "32             29                     yi-34b-chat   \n",
       "36             32              gpt-3.5-turbo-0125   \n",
       "39             36               openchat-3.5-0106   \n",
       "43             40  nous-hermes-2-mixtral-8x7b-dpo   \n",
       "60             55          codellama-70b-instruct   \n",
       "\n",
       "                             Model MT-bench (score)   MMLU  \\\n",
       "4               GPT-4-0125-preview                -      -   \n",
       "32                     Yi-34B-Chat                -  0.735   \n",
       "36              GPT-3.5-Turbo-0125                -      -   \n",
       "39               OpenChat-3.5-0106              7.8  0.658   \n",
       "43  Nous-Hermes-2-Mixtral-8x7B-DPO                -      -   \n",
       "60          CodeLlama-70B-instruct                -      -   \n",
       "\n",
       "   Knowledge cutoff date          License  Organization  \\\n",
       "4                2023/12  Proprietary LLM        OpenAI   \n",
       "32                2023/6         Open LLM         01 AI   \n",
       "36                2021/9  Proprietary LLM        OpenAI   \n",
       "39                2024/1         Open LLM      OpenChat   \n",
       "43                2024/1         Open LLM  NousResearch   \n",
       "60                2024/1         Open LLM          Meta   \n",
       "\n",
       "                                                 Link Release Date  \\\n",
       "4   https://openai.com/blog/new-models-and-develop...   2024-01-25   \n",
       "32           https://huggingface.co/01-ai/Yi-34B-Chat   2024-01-23   \n",
       "36  https://platform.openai.com/docs/models/gpt-3-...   2024-01-25   \n",
       "39  https://huggingface.co/openchat/openchat-3.5-0106   2024-01-06   \n",
       "43  https://huggingface.co/NousResearch/Nous-Herme...   2024-01-13   \n",
       "60  https://huggingface.co/codellama/CodeLlama-70b-hf   2024-01-29   \n",
       "\n",
       "     license_binary Month-Year  \n",
       "4   Proprietary LLM    2024-01  \n",
       "32         Open LLM    2024-01  \n",
       "36  Proprietary LLM    2024-01  \n",
       "39         Open LLM    2024-01  \n",
       "43         Open LLM    2024-01  \n",
       "60         Open LLM    2024-01  "
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df[\"Month-Year\"] == \"2024-01\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/w0/6t9rxkj97rv47l9sc0q22yth0000gn/T/ipykernel_7726/1725500526.py:1: DeprecationWarning:\n",
      "\n",
      "DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rating</th>\n",
       "      <th>variance</th>\n",
       "      <th>rating_q975</th>\n",
       "      <th>rating_q025</th>\n",
       "      <th>num_battles</th>\n",
       "      <th>final_ranking</th>\n",
       "      <th>key</th>\n",
       "      <th>Model</th>\n",
       "      <th>MT-bench (score)</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>Knowledge cutoff date</th>\n",
       "      <th>License</th>\n",
       "      <th>Organization</th>\n",
       "      <th>Link</th>\n",
       "      <th>Release Date</th>\n",
       "      <th>license_binary</th>\n",
       "      <th>Month-Year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1111.132640</td>\n",
       "      <td>7.801741</td>\n",
       "      <td>1115.356993</td>\n",
       "      <td>1105.658254</td>\n",
       "      <td>13177</td>\n",
       "      <td>29</td>\n",
       "      <td>yi-34b-chat</td>\n",
       "      <td>Yi-34B-Chat</td>\n",
       "      <td>-</td>\n",
       "      <td>0.735</td>\n",
       "      <td>2023/6</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>01 AI</td>\n",
       "      <td>https://huggingface.co/01-ai/Yi-34B-Chat</td>\n",
       "      <td>2024-01-23</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1098.527455</td>\n",
       "      <td>6.400166</td>\n",
       "      <td>1103.343592</td>\n",
       "      <td>1093.903695</td>\n",
       "      <td>14159</td>\n",
       "      <td>36</td>\n",
       "      <td>openchat-3.5-0106</td>\n",
       "      <td>OpenChat-3.5-0106</td>\n",
       "      <td>7.8</td>\n",
       "      <td>0.658</td>\n",
       "      <td>2024/1</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>OpenChat</td>\n",
       "      <td>https://huggingface.co/openchat/openchat-3.5-0106</td>\n",
       "      <td>2024-01-06</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1087.307758</td>\n",
       "      <td>18.314258</td>\n",
       "      <td>1094.532598</td>\n",
       "      <td>1078.413814</td>\n",
       "      <td>3980</td>\n",
       "      <td>40</td>\n",
       "      <td>nous-hermes-2-mixtral-8x7b-dpo</td>\n",
       "      <td>Nous-Hermes-2-Mixtral-8x7B-DPO</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2024/1</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>NousResearch</td>\n",
       "      <td>https://huggingface.co/NousResearch/Nous-Herme...</td>\n",
       "      <td>2024-01-13</td>\n",
       "      <td>Open LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1246.777591</td>\n",
       "      <td>1.942477</td>\n",
       "      <td>1249.979712</td>\n",
       "      <td>1244.305362</td>\n",
       "      <td>67354</td>\n",
       "      <td>2</td>\n",
       "      <td>gpt-4-0125-preview</td>\n",
       "      <td>GPT-4-0125-preview</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2023/12</td>\n",
       "      <td>Proprietary LLM</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://openai.com/blog/new-models-and-develop...</td>\n",
       "      <td>2024-01-25</td>\n",
       "      <td>Proprietary LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1107.129810</td>\n",
       "      <td>2.419182</td>\n",
       "      <td>1110.056188</td>\n",
       "      <td>1104.002581</td>\n",
       "      <td>47220</td>\n",
       "      <td>32</td>\n",
       "      <td>gpt-3.5-turbo-0125</td>\n",
       "      <td>GPT-3.5-Turbo-0125</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2021/9</td>\n",
       "      <td>Proprietary LLM</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://platform.openai.com/docs/models/gpt-3-...</td>\n",
       "      <td>2024-01-25</td>\n",
       "      <td>Proprietary LLM</td>\n",
       "      <td>2024-01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        rating   variance  rating_q975  rating_q025  num_battles  \\\n",
       "0  1111.132640   7.801741  1115.356993  1105.658254        13177   \n",
       "1  1098.527455   6.400166  1103.343592  1093.903695        14159   \n",
       "2  1087.307758  18.314258  1094.532598  1078.413814         3980   \n",
       "3  1246.777591   1.942477  1249.979712  1244.305362        67354   \n",
       "4  1107.129810   2.419182  1110.056188  1104.002581        47220   \n",
       "\n",
       "   final_ranking                             key  \\\n",
       "0             29                     yi-34b-chat   \n",
       "1             36               openchat-3.5-0106   \n",
       "2             40  nous-hermes-2-mixtral-8x7b-dpo   \n",
       "3              2              gpt-4-0125-preview   \n",
       "4             32              gpt-3.5-turbo-0125   \n",
       "\n",
       "                            Model MT-bench (score)   MMLU  \\\n",
       "0                     Yi-34B-Chat                -  0.735   \n",
       "1               OpenChat-3.5-0106              7.8  0.658   \n",
       "2  Nous-Hermes-2-Mixtral-8x7B-DPO                -      -   \n",
       "3              GPT-4-0125-preview                -      -   \n",
       "4              GPT-3.5-Turbo-0125                -      -   \n",
       "\n",
       "  Knowledge cutoff date          License  Organization  \\\n",
       "0                2023/6         Open LLM         01 AI   \n",
       "1                2024/1         Open LLM      OpenChat   \n",
       "2                2024/1         Open LLM  NousResearch   \n",
       "3               2023/12  Proprietary LLM        OpenAI   \n",
       "4                2021/9  Proprietary LLM        OpenAI   \n",
       "\n",
       "                                                Link Release Date  \\\n",
       "0           https://huggingface.co/01-ai/Yi-34B-Chat   2024-01-23   \n",
       "1  https://huggingface.co/openchat/openchat-3.5-0106   2024-01-06   \n",
       "2  https://huggingface.co/NousResearch/Nous-Herme...   2024-01-13   \n",
       "3  https://openai.com/blog/new-models-and-develop...   2024-01-25   \n",
       "4  https://platform.openai.com/docs/models/gpt-3-...   2024-01-25   \n",
       "\n",
       "    license_binary Month-Year  \n",
       "0         Open LLM    2024-01  \n",
       "1         Open LLM    2024-01  \n",
       "2         Open LLM    2024-01  \n",
       "3  Proprietary LLM    2024-01  \n",
       "4  Proprietary LLM    2024-01  "
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df[\"Month-Year\"] == \"2024-01\"].groupby([\"Month-Year\", \"License\"]).apply(\n",
    "    lambda x: x.nlargest(3, \"rating\")\n",
    ").reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['rating', 'variance', 'rating_q975', 'rating_q025', 'num_battles',\n",
       "       'final_ranking', 'key', 'Model', 'MT-bench (score)', 'MMLU',\n",
       "       'Knowledge cutoff date', 'License', 'Organization', 'Link',\n",
       "       'Release Date', 'license_binary'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "customdata": [
          [
           "OpenAI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "OpenAI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Anthropic",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Google",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "OpenAI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Google",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Anthropic",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "OpenAI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Anthropic",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "OpenAI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Mistral",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Reka AI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Anthropic",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Mistral",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Reka AI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Google",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Anthropic",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Mistral",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "OpenAI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Anthropic",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Google",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Anthropic",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "OpenAI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "OpenAI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "LMSYS",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Perplexity AI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "OpenAI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Perplexity AI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "UW",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Google",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "UC Berkeley",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Nomic AI",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Stanford",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Tsinghua",
           "Proprietary LLM",
           "Proprietary LLM"
          ],
          [
           "Meta",
           "Proprietary LLM",
           "Proprietary LLM"
          ]
         ],
         "hovertemplate": "<b>%{hovertext}</b><br><br>license_binary=%{customdata[2]}<br>Release Date=%{x}<br>Arena ELO=%{y}<br>Organization=%{customdata[0]}<br>License=%{customdata[1]}<extra></extra>",
         "hovertext": [
          "GPT-4-Turbo-2024-04-09",
          "GPT-4-1106-preview",
          "Claude 3 Opus",
          "Gemini 1.5 Pro API-0409-Preview",
          "GPT-4-0125-preview",
          "Bard (Gemini Pro)",
          "Claude 3 Sonnet",
          "GPT-4-0314",
          "Claude 3 Haiku",
          "GPT-4-0613",
          "Mistral-Large-2402",
          "Reka-Flash-21B-online",
          "Claude-1",
          "Mistral Medium",
          "Reka-Flash-21B",
          "Gemini Pro (Dev API)",
          "Claude-2.0",
          "Mistral-Next",
          "GPT-3.5-Turbo-0613",
          "Claude-2.1",
          "Gemini Pro",
          "Claude-Instant-1",
          "GPT-3.5-Turbo-0314",
          "GPT-3.5-Turbo-0125",
          "Vicuna-33B",
          "pplx-70b-online",
          "GPT-3.5-Turbo-1106",
          "pplx-7b-online",
          "Guanaco-33B",
          "PaLM-Chat-Bison-001",
          "Koala-13B",
          "GPT4All-13B-Snoozy",
          "Alpaca-13B",
          "ChatGLM-6B",
          "LLaMA-13B"
         ],
         "legendgroup": "Proprietary LLM",
         "marker": {
          "color": "#636efa",
          "size": 8,
          "symbol": "circle"
         },
         "mode": "markers",
         "name": "Proprietary LLM",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          "2024-04-09T00:00:00",
          "2023-11-06T00:00:00",
          "2024-02-29T00:00:00",
          "2024-04-09T00:00:00",
          "2024-01-25T00:00:00",
          "2024-02-01T00:00:00",
          "2024-02-29T00:00:00",
          "2024-03-14T00:00:00",
          "2024-03-07T00:00:00",
          "2023-06-13T00:00:00",
          "2024-02-24T00:00:00",
          "2024-02-26T00:00:00",
          "2023-03-14T00:00:00",
          "2023-12-11T00:00:00",
          "2024-02-26T00:00:00",
          "2023-12-13T00:00:00",
          "2023-07-11T00:00:00",
          "2024-02-17T00:00:00",
          "2023-06-13T00:00:00",
          "2023-11-21T00:00:00",
          "2023-12-13T00:00:00",
          "2023-03-14T00:00:00",
          "2024-03-14T00:00:00",
          "2024-01-25T00:00:00",
          "2023-06-21T00:00:00",
          "2023-11-29T00:00:00",
          "2023-11-06T00:00:00",
          "2023-11-29T00:00:00",
          "2023-05-22T00:00:00",
          "2023-07-10T00:00:00",
          "2023-04-03T00:00:00",
          "2023-04-24T00:00:00",
          "2023-03-13T00:00:00",
          "2023-03-13T00:00:00",
          "2023-02-27T00:00:00"
         ],
         "xaxis": "x",
         "y": [
          1258.8152791324715,
          1252.6848856241577,
          1250.9262064295565,
          1249.6183945401244,
          1246.7775913509702,
          1208.7128773784577,
          1201.2654981955752,
          1189.557977031121,
          1180.8870022256567,
          1165.279013874706,
          1157.2129636222178,
          1153.368015144387,
          1150.6246111849628,
          1148.003325470259,
          1147.136619289767,
          1135.7254379948201,
          1132.3083987521873,
          1126.6887059695398,
          1119.8996424050451,
          1119.0708879096221,
          1115.3213731540973,
          1110.3806845414053,
          1108.9125926100855,
          1107.1298100300314,
          1093.8870113925889,
          1075.4285458870645,
          1072.711340370162,
          1043.3909111518306,
          1034.3952377983876,
          1009.7116452193085,
          969.48148016344,
          938.8924300511185,
          908.0843590844727,
          886.8734292498528,
          804.3563285706291
         ],
         "yaxis": "y"
        },
        {
         "customdata": [
          [
           "Meta",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Cohere",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Meta",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Alibaba",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Cohere",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Mistral",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Alibaba",
           "Open LLM",
           "Open LLM"
          ],
          [
           "HuggingFace",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Nexusflow",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Alibaba",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Mistral",
           "Open LLM",
           "Open LLM"
          ],
          [
           "01 AI",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Microsoft",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Databricks",
           "Open LLM",
           "Open LLM"
          ],
          [
           "AllenAI/UW",
           "Open LLM",
           "Open LLM"
          ],
          [
           "OpenChat",
           "Open LLM",
           "Open LLM"
          ],
          [
           "UC Berkeley",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Meta",
           "Open LLM",
           "Open LLM"
          ],
          [
           "NousResearch",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Google",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Nvidia",
           "Open LLM",
           "Open LLM"
          ],
          [
           "DeepSeek AI",
           "Open LLM",
           "Open LLM"
          ],
          [
           "OpenChat",
           "Open LLM",
           "Open LLM"
          ],
          [
           "NousResearch",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Alibaba",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Mistral",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Cognitive Computations",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Upstage AI",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Microsoft",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Meta",
           "Open LLM",
           "Open LLM"
          ],
          [
           "HuggingFace",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Microsoft",
           "Open LLM",
           "Open LLM"
          ],
          [
           "LMSYS",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Meta",
           "Open LLM",
           "Open LLM"
          ],
          [
           "MosaicML",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Meta",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Google",
           "Open LLM",
           "Open LLM"
          ],
          [
           "HuggingFace",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Meta",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Alibaba",
           "Open LLM",
           "Open LLM"
          ],
          [
           "TII",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Together AI",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Allen AI",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Google",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Mistral",
           "Open LLM",
           "Open LLM"
          ],
          [
           "LMSYS",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Alibaba",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Google",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Tsinghua",
           "Open LLM",
           "Open LLM"
          ],
          [
           "MosaicML",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Tsinghua",
           "Open LLM",
           "Open LLM"
          ],
          [
           "RWKV",
           "Open LLM",
           "Open LLM"
          ],
          [
           "OpenAssistant",
           "Open LLM",
           "Open LLM"
          ],
          [
           "LMSYS",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Stability AI",
           "Open LLM",
           "Open LLM"
          ],
          [
           "Databricks",
           "Open LLM",
           "Open LLM"
          ]
         ],
         "hovertemplate": "<b>%{hovertext}</b><br><br>license_binary=%{customdata[2]}<br>Release Date=%{x}<br>Arena ELO=%{y}<br>Organization=%{customdata[0]}<br>License=%{customdata[1]}<extra></extra>",
         "hovertext": [
          "Llama-3-70b-Instruct",
          "Command R+",
          "Llama-3-8b-Instruct",
          "Qwen1.5-72B-Chat",
          "Command R",
          "Mixtral-8x22b-Instruct-v0.1",
          "Qwen1.5-32B-Chat",
          "Zephyr-ORPO-141b-A35b-v0.1",
          "Starling-LM-7B-beta",
          "Qwen1.5-14B-Chat",
          "Mixtral-8x7b-Instruct-v0.1",
          "Yi-34B-Chat",
          "WizardLM-70B-v1.0",
          "DBRX-Instruct-Preview",
          "Tulu-2-DPO-70B",
          "OpenChat-3.5-0106",
          "Starling-LM-7B-alpha",
          "Llama-2-70b-chat",
          "Nous-Hermes-2-Mixtral-8x7B-DPO",
          "Gemma-1.1-7B-it",
          "NV-Llama2-70B-SteerLM-Chat",
          "DeepSeek-LLM-67B-Chat",
          "OpenChat-3.5",
          "OpenHermes-2.5-Mistral-7b",
          "Qwen1.5-7B-Chat",
          "Mistral-7B-Instruct-v0.2",
          "Dolphin-2.2.1-Mistral-7B",
          "SOLAR-10.7B-Instruct-v1.0",
          "WizardLM-13b-v1.2",
          "Llama-2-13b-chat",
          "Zephyr-7b-beta",
          "Phi-3-Mini-128k-Instruct",
          "Vicuna-13B",
          "CodeLlama-70B-instruct",
          "MPT-30B-chat",
          "CodeLlama-34B-instruct",
          "Gemma-7B-it",
          "Zephyr-7b-alpha",
          "Llama-2-7b-chat",
          "Qwen-14B-Chat",
          "falcon-180b-chat",
          "StripedHyena-Nous-7B",
          "OLMo-7B-instruct",
          "Gemma-1.1-2B-it",
          "Mistral-7B-Instruct-v0.1",
          "Vicuna-7B",
          "Qwen1.5-4B-Chat",
          "Gemma-2B-it",
          "ChatGLM3-6B",
          "MPT-7B-Chat",
          "ChatGLM2-6B",
          "RWKV-4-Raven-14B",
          "OpenAssistant-Pythia-12B",
          "FastChat-T5-3B",
          "StableLM-Tuned-Alpha-7B",
          "Dolly-V2-12B"
         ],
         "legendgroup": "Open LLM",
         "marker": {
          "color": "#EF553B",
          "size": 8,
          "symbol": "circle"
         },
         "mode": "markers",
         "name": "Open LLM",
         "orientation": "v",
         "showlegend": true,
         "type": "scatter",
         "x": [
          "2024-04-18T00:00:00",
          "2024-04-04T00:00:00",
          "2024-04-18T00:00:00",
          "2024-02-04T00:00:00",
          "2024-03-11T00:00:00",
          "2024-04-17T00:00:00",
          "2024-02-04T00:00:00",
          "2024-04-12T00:00:00",
          "2024-03-20T00:00:00",
          "2024-02-04T00:00:00",
          "2023-12-11T00:00:00",
          "2024-01-23T00:00:00",
          "2023-08-09T00:00:00",
          "2024-03-27T00:00:00",
          "2023-11-12T00:00:00",
          "2024-01-06T00:00:00",
          "2023-11-25T00:00:00",
          "2023-07-18T00:00:00",
          "2024-01-13T00:00:00",
          "2024-04-09T00:00:00",
          "2023-11-24T00:00:00",
          "2023-11-29T00:00:00",
          "2023-11-16T00:00:00",
          "2023-10-29T00:00:00",
          "2024-02-04T00:00:00",
          "2023-12-11T00:00:00",
          "2023-10-30T00:00:00",
          "2023-12-13T00:00:00",
          "2023-07-25T00:00:00",
          "2023-07-18T00:00:00",
          "2023-10-26T00:00:00",
          "2024-04-23T00:00:00",
          "2023-07-23T00:00:00",
          "2024-01-29T00:00:00",
          "2023-06-09T00:00:00",
          "2023-08-24T00:00:00",
          "2024-02-21T00:00:00",
          "2023-10-09T00:00:00",
          "2023-07-18T00:00:00",
          "2023-09-24T00:00:00",
          "2023-09-05T00:00:00",
          "2023-12-07T00:00:00",
          "2024-02-23T00:00:00",
          "2024-04-09T00:00:00",
          "2023-09-27T00:00:00",
          "2023-07-29T00:00:00",
          "2024-02-04T00:00:00",
          "2024-02-21T00:00:00",
          "2023-10-25T00:00:00",
          "2023-05-04T00:00:00",
          "2023-06-25T00:00:00",
          "2023-05-22T00:00:00",
          "2023-04-03T00:00:00",
          "2023-04-27T00:00:00",
          "2023-04-19T00:00:00",
          "2023-04-12T00:00:00"
         ],
         "xaxis": "x",
         "y": [
          1209.6462958943152,
          1190.5291640364956,
          1152.500938092916,
          1152.485612667822,
          1147.8966494489798,
          1145.8123271934626,
          1133.8011394014864,
          1128.8163366984966,
          1118.5178781177128,
          1118.475700517794,
          1114,
          1111.1326399460543,
          1108.552744333791,
          1103.2167069462541,
          1102.79428840509,
          1098.527455141752,
          1091.5210240331344,
          1088.7078065720734,
          1087.307757938674,
          1082.9619916739105,
          1082.4713591517852,
          1079.7362777221456,
          1078.6663284631356,
          1078.6429577216027,
          1076.5321247427814,
          1074.0655548845186,
          1065.574858796917,
          1065.0611191304033,
          1061.9003873957429,
          1056.9265912995625,
          1054.4162995844372,
          1050.1481252382014,
          1047.9555279582555,
          1047.927687897156,
          1047.823066613369,
          1047.396876459045,
          1043.5443043467913,
          1043.0842673002462,
          1040.7537596503887,
          1038.586932982431,
          1037.076380506833,
          1023.112092466059,
          1020.7569311460566,
          1014.832737666584,
          1012.1048679697501,
          1009.3834445358582,
          1002.744713564041,
          999.6431193544297,
          960.7895509564338,
          933.340871331175,
          933.3372880828122,
          928.4512512366093,
          900.2948677134343,
          876.9291083582452,
          848.9325675003323,
          826.6473317994165
         ],
         "yaxis": "y"
        }
       ],
       "layout": {
        "legend": {
         "title": {
          "text": "license_binary"
         },
         "tracegroupgap": 0
        },
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "white",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "white",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "#C8D4E3",
             "linecolor": "#C8D4E3",
             "minorgridcolor": "#C8D4E3",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "#C8D4E3",
             "linecolor": "#C8D4E3",
             "minorgridcolor": "#C8D4E3",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "fillpattern": {
             "fillmode": "overlay",
             "size": 10,
             "solidity": 0.2
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "white",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "#C8D4E3"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "white",
          "polar": {
           "angularaxis": {
            "gridcolor": "#EBF0F8",
            "linecolor": "#EBF0F8",
            "ticks": ""
           },
           "bgcolor": "white",
           "radialaxis": {
            "gridcolor": "#EBF0F8",
            "linecolor": "#EBF0F8",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "white",
            "gridcolor": "#DFE8F3",
            "gridwidth": 2,
            "linecolor": "#EBF0F8",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "#EBF0F8"
           },
           "yaxis": {
            "backgroundcolor": "white",
            "gridcolor": "#DFE8F3",
            "gridwidth": 2,
            "linecolor": "#EBF0F8",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "#EBF0F8"
           },
           "zaxis": {
            "backgroundcolor": "white",
            "gridcolor": "#DFE8F3",
            "gridwidth": 2,
            "linecolor": "#EBF0F8",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "#EBF0F8"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "#DFE8F3",
            "linecolor": "#A2B1C6",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "#DFE8F3",
            "linecolor": "#A2B1C6",
            "ticks": ""
           },
           "bgcolor": "white",
           "caxis": {
            "gridcolor": "#DFE8F3",
            "linecolor": "#A2B1C6",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "#EBF0F8",
           "linecolor": "#EBF0F8",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "#EBF0F8",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "#EBF0F8",
           "linecolor": "#EBF0F8",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "#EBF0F8",
           "zerolinewidth": 2
          }
         }
        },
        "title": {
         "text": "Closed-source vs. Open-weight models (Arena ELO, 19 Apr 24)"
        },
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "Release Date"
         }
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "Arena ELO"
         }
        }
       }
      }
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "\n",
    "# Plotting\n",
    "fig = px.scatter(\n",
    "    df,\n",
    "    x=\"Release Date\",\n",
    "    y=\"rating\",\n",
    "    color=\"license_binary\",\n",
    "    hover_name=\"Model\",\n",
    "    hover_data=[\n",
    "        \"Release Date\",\n",
    "        \"Organization\",\n",
    "        \"License\",\n",
    "        \"license_binary\",\n",
    "    ],\n",
    "    title=\"Closed-source vs. Open-weight models (Arena ELO, 19 Apr 24)\",\n",
    "    labels={\"rating\": \"Arena ELO\", \"Release Date\": \"Release Date\"},\n",
    "    template=\"plotly_white\",\n",
    ")\n",
    "fig.update_traces(marker=dict(size=8))\n",
    "\n",
    "# Display the plot\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "plotly.graph_objs._figure.Figure"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(fig)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}