{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle\n",
    "\n",
    "import pandas as pd\n",
    "from huggingface_hub import HfFileSystem, hf_hub_download"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "fs = HfFileSystem()\n",
    "\n",
    "\n",
    "def extract_date(filename):\n",
    "    return filename.split(\"/\")[-1].split(\".\")[0].split(\"_\")[-1]\n",
    "\n",
    "\n",
    "ELO_DATA_FILES = \"spaces/lmsys/chatbot-arena-leaderboard/*.pkl\"\n",
    "elo_files = fs.glob(ELO_DATA_FILES)\n",
    "latest_elo_file = sorted(elo_files, key=extract_date, reverse=True)[0]\n",
    "\n",
    "LEADERBOARD_DATA_FILES = \"spaces/lmsys/chatbot-arena-leaderboard/*.csv\"\n",
    "leaderboard_files = fs.glob(LEADERBOARD_DATA_FILES)\n",
    "latest_leaderboard_file = sorted(leaderboard_files, key=extract_date, reverse=True)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('leaderboard_table_20240426.csv', 'elo_results_20240426.pkl')"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "latest_leaderboard_file.split(\"/\")[-1], latest_elo_file.split(\"/\")[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "latest_elo_file_local = hf_hub_download(\n",
    "    repo_id=\"lmsys/chatbot-arena-leaderboard\",\n",
    "    filename=latest_elo_file.split(\"/\")[-1],\n",
    "    repo_type=\"space\",\n",
    ")\n",
    "latest_leaderboard_file_local = hf_hub_download(\n",
    "    repo_id=\"lmsys/chatbot-arena-leaderboard\",\n",
    "    filename=latest_leaderboard_file.split(\"/\")[-1],\n",
    "    repo_type=\"space\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load and prepare ELO data\n",
    "key_to_category_name = {\n",
    "    \"full\": \"Overall\",\n",
    "    \"coding\": \"Coding\",\n",
    "    \"long_user\": \"Longer Query\",\n",
    "    \"english\": \"English\",\n",
    "    \"chinese\": \"Chinese\",\n",
    "    \"french\": \"French\",\n",
    "    \"no_tie\": \"Exclude Ties\",\n",
    "    \"no_short\": \"Exclude Short Query (< 5 tokens)\",\n",
    "    \"no_refusal\": \"Exclude Refusal\",\n",
    "}\n",
    "cat_name_to_explanation = {\n",
    "    \"Overall\": \"Overall Questions\",\n",
    "    \"Coding\": \"Coding: whether conversation contains code snippets\",\n",
    "    \"Longer Query\": \"Longer Query (>= 500 tokens)\",\n",
    "    \"English\": \"English Prompts\",\n",
    "    \"Chinese\": \"Chinese Prompts\",\n",
    "    \"French\": \"French Prompts\",\n",
    "    \"Exclude Ties\": \"Exclude Ties and Bothbad\",\n",
    "    \"Exclude Short Query (< 5 tokens)\": \"Exclude Short User Query (< 5 tokens)\",\n",
    "    \"Exclude Refusal\": 'Exclude model responses with refusal (e.g., \"I cannot answer\")',\n",
    "}\n",
    "\n",
    "with open(latest_elo_file_local, \"rb\") as fin:\n",
    "    elo_results = pickle.load(fin)\n",
    "\n",
    "arena_dfs = {}\n",
    "for k in key_to_category_name.keys():\n",
    "    if k not in elo_results:\n",
    "        continue\n",
    "    arena_dfs[key_to_category_name[k]] = elo_results[k][\"leaderboard_table_df\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['Overall', 'Coding', 'Longer Query', 'English', 'Chinese', 'French', 'Exclude Ties', 'Exclude Short Query (< 5 tokens)', 'Exclude Refusal'])"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arena_dfs.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rating</th>\n",
       "      <th>variance</th>\n",
       "      <th>rating_q975</th>\n",
       "      <th>rating_q025</th>\n",
       "      <th>num_battles</th>\n",
       "      <th>final_ranking</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>RWKV-4-Raven-14B</th>\n",
       "      <td>927.710294</td>\n",
       "      <td>27.143015</td>\n",
       "      <td>935.717850</td>\n",
       "      <td>916.546369</td>\n",
       "      <td>5129</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>alpaca-13b</th>\n",
       "      <td>907.324482</td>\n",
       "      <td>20.736682</td>\n",
       "      <td>915.536856</td>\n",
       "      <td>899.330070</td>\n",
       "      <td>6111</td>\n",
       "      <td>85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bard-jan-24-gemini-pro</th>\n",
       "      <td>1208.505408</td>\n",
       "      <td>6.679087</td>\n",
       "      <td>1213.291358</td>\n",
       "      <td>1203.926901</td>\n",
       "      <td>12388</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chatglm-6b</th>\n",
       "      <td>886.107553</td>\n",
       "      <td>17.110417</td>\n",
       "      <td>894.034333</td>\n",
       "      <td>878.094776</td>\n",
       "      <td>5195</td>\n",
       "      <td>86</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>chatglm2-6b</th>\n",
       "      <td>932.678460</td>\n",
       "      <td>33.530570</td>\n",
       "      <td>943.455598</td>\n",
       "      <td>921.346322</td>\n",
       "      <td>2880</td>\n",
       "      <td>81</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wizardlm-70b</th>\n",
       "      <td>1107.992552</td>\n",
       "      <td>9.385887</td>\n",
       "      <td>1114.218223</td>\n",
       "      <td>1102.655575</td>\n",
       "      <td>8868</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>yi-34b-chat</th>\n",
       "      <td>1109.722447</td>\n",
       "      <td>8.596908</td>\n",
       "      <td>1115.182579</td>\n",
       "      <td>1103.991095</td>\n",
       "      <td>12252</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zephyr-7b-alpha</th>\n",
       "      <td>1042.108710</td>\n",
       "      <td>43.900714</td>\n",
       "      <td>1052.991768</td>\n",
       "      <td>1027.160917</td>\n",
       "      <td>1901</td>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zephyr-7b-beta</th>\n",
       "      <td>1053.655680</td>\n",
       "      <td>10.297607</td>\n",
       "      <td>1059.923254</td>\n",
       "      <td>1047.601629</td>\n",
       "      <td>11924</td>\n",
       "      <td>54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zephyr-orpo-141b-A35b-v0.1</th>\n",
       "      <td>1124.677515</td>\n",
       "      <td>22.288515</td>\n",
       "      <td>1132.728887</td>\n",
       "      <td>1113.848432</td>\n",
       "      <td>4276</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>91 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 rating   variance  rating_q975  rating_q025  \\\n",
       "RWKV-4-Raven-14B             927.710294  27.143015   935.717850   916.546369   \n",
       "alpaca-13b                   907.324482  20.736682   915.536856   899.330070   \n",
       "bard-jan-24-gemini-pro      1208.505408   6.679087  1213.291358  1203.926901   \n",
       "chatglm-6b                   886.107553  17.110417   894.034333   878.094776   \n",
       "chatglm2-6b                  932.678460  33.530570   943.455598   921.346322   \n",
       "...                                 ...        ...          ...          ...   \n",
       "wizardlm-70b                1107.992552   9.385887  1114.218223  1102.655575   \n",
       "yi-34b-chat                 1109.722447   8.596908  1115.182579  1103.991095   \n",
       "zephyr-7b-alpha             1042.108710  43.900714  1052.991768  1027.160917   \n",
       "zephyr-7b-beta              1053.655680  10.297607  1059.923254  1047.601629   \n",
       "zephyr-orpo-141b-A35b-v0.1  1124.677515  22.288515  1132.728887  1113.848432   \n",
       "\n",
       "                            num_battles  final_ranking  \n",
       "RWKV-4-Raven-14B                   5129             81  \n",
       "alpaca-13b                         6111             85  \n",
       "bard-jan-24-gemini-pro            12388              6  \n",
       "chatglm-6b                         5195             86  \n",
       "chatglm2-6b                        2880             81  \n",
       "...                                 ...            ...  \n",
       "wizardlm-70b                       8868             29  \n",
       "yi-34b-chat                       12252             29  \n",
       "zephyr-7b-alpha                    1901             58  \n",
       "zephyr-7b-beta                    11924             54  \n",
       "zephyr-orpo-141b-A35b-v0.1         4276             22  \n",
       "\n",
       "[91 rows x 6 columns]"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arena_dfs[\"Overall\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load and prepare Leaderboard data\n",
    "leaderboard_df = pd.read_csv(latest_leaderboard_file_local)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key</th>\n",
       "      <th>Model</th>\n",
       "      <th>MT-bench (score)</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>Knowledge cutoff date</th>\n",
       "      <th>License</th>\n",
       "      <th>Organization</th>\n",
       "      <th>Link</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>wizardlm-30b</td>\n",
       "      <td>WizardLM-30B</td>\n",
       "      <td>7.01</td>\n",
       "      <td>0.587</td>\n",
       "      <td>2023/6</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>Microsoft</td>\n",
       "      <td>https://huggingface.co/WizardLM/WizardLM-30B-V1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>vicuna-13b-16k</td>\n",
       "      <td>Vicuna-13B-16k</td>\n",
       "      <td>6.92</td>\n",
       "      <td>0.545</td>\n",
       "      <td>2023/7</td>\n",
       "      <td>Llama 2 Community</td>\n",
       "      <td>LMSYS</td>\n",
       "      <td>https://huggingface.co/lmsys/vicuna-13b-v1.5-16k</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>wizardlm-13b-v1.1</td>\n",
       "      <td>WizardLM-13B-v1.1</td>\n",
       "      <td>6.76</td>\n",
       "      <td>0.500</td>\n",
       "      <td>2023/7</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>Microsoft</td>\n",
       "      <td>https://huggingface.co/WizardLM/WizardLM-13B-V1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>tulu-30b</td>\n",
       "      <td>Tulu-30B</td>\n",
       "      <td>6.43</td>\n",
       "      <td>0.581</td>\n",
       "      <td>2023/6</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>AllenAI/UW</td>\n",
       "      <td>https://huggingface.co/allenai/tulu-30b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>guanaco-65b</td>\n",
       "      <td>Guanaco-65B</td>\n",
       "      <td>6.41</td>\n",
       "      <td>0.621</td>\n",
       "      <td>2023/5</td>\n",
       "      <td>Non-commercial</td>\n",
       "      <td>UW</td>\n",
       "      <td>https://huggingface.co/timdettmers/guanaco-65b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>mixtral-8x22b-instruct-v0.1</td>\n",
       "      <td>Mixtral-8x22b-Instruct-v0.1</td>\n",
       "      <td>-</td>\n",
       "      <td>0.778</td>\n",
       "      <td>2024/4</td>\n",
       "      <td>Apache 2.0</td>\n",
       "      <td>Mistral</td>\n",
       "      <td>https://mistral.ai/news/mixtral-8x22b/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>llama-3-70b-instruct</td>\n",
       "      <td>Llama-3-70b-Instruct</td>\n",
       "      <td>-</td>\n",
       "      <td>0.820</td>\n",
       "      <td>2023/12</td>\n",
       "      <td>Llama 3 Community</td>\n",
       "      <td>Meta</td>\n",
       "      <td>https://llama.meta.com/llama3/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>llama-3-8b-instruct</td>\n",
       "      <td>Llama-3-8b-Instruct</td>\n",
       "      <td>-</td>\n",
       "      <td>0.684</td>\n",
       "      <td>2023/3</td>\n",
       "      <td>Llama 3 Community</td>\n",
       "      <td>Meta</td>\n",
       "      <td>https://llama.meta.com/llama3/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>gemini-1.5-pro-api-0409-preview</td>\n",
       "      <td>Gemini 1.5 Pro API-0409-Preview</td>\n",
       "      <td>-</td>\n",
       "      <td>0.819</td>\n",
       "      <td>2023/11</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Google</td>\n",
       "      <td>https://blog.google/technology/ai/google-gemin...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>104</th>\n",
       "      <td>phi-3-mini-128k-instruct</td>\n",
       "      <td>Phi-3-Mini-128k-Instruct</td>\n",
       "      <td>-</td>\n",
       "      <td>0.681</td>\n",
       "      <td>2023/10</td>\n",
       "      <td>MIT</td>\n",
       "      <td>Microsoft</td>\n",
       "      <td>https://azure.microsoft.com/en-us/blog/introdu...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>105 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 key                            Model  \\\n",
       "0                       wizardlm-30b                     WizardLM-30B   \n",
       "1                     vicuna-13b-16k                   Vicuna-13B-16k   \n",
       "2                  wizardlm-13b-v1.1                WizardLM-13B-v1.1   \n",
       "3                           tulu-30b                         Tulu-30B   \n",
       "4                        guanaco-65b                      Guanaco-65B   \n",
       "..                               ...                              ...   \n",
       "100      mixtral-8x22b-instruct-v0.1      Mixtral-8x22b-Instruct-v0.1   \n",
       "101             llama-3-70b-instruct             Llama-3-70b-Instruct   \n",
       "102              llama-3-8b-instruct              Llama-3-8b-Instruct   \n",
       "103  gemini-1.5-pro-api-0409-preview  Gemini 1.5 Pro API-0409-Preview   \n",
       "104         phi-3-mini-128k-instruct         Phi-3-Mini-128k-Instruct   \n",
       "\n",
       "    MT-bench (score)   MMLU Knowledge cutoff date            License  \\\n",
       "0               7.01  0.587                2023/6     Non-commercial   \n",
       "1               6.92  0.545                2023/7  Llama 2 Community   \n",
       "2               6.76  0.500                2023/7     Non-commercial   \n",
       "3               6.43  0.581                2023/6     Non-commercial   \n",
       "4               6.41  0.621                2023/5     Non-commercial   \n",
       "..               ...    ...                   ...                ...   \n",
       "100                -  0.778                2024/4         Apache 2.0   \n",
       "101                -  0.820               2023/12  Llama 3 Community   \n",
       "102                -  0.684                2023/3  Llama 3 Community   \n",
       "103                -  0.819               2023/11        Proprietary   \n",
       "104                -  0.681               2023/10                MIT   \n",
       "\n",
       "    Organization                                               Link  \n",
       "0      Microsoft  https://huggingface.co/WizardLM/WizardLM-30B-V1.0  \n",
       "1          LMSYS   https://huggingface.co/lmsys/vicuna-13b-v1.5-16k  \n",
       "2      Microsoft  https://huggingface.co/WizardLM/WizardLM-13B-V1.1  \n",
       "3     AllenAI/UW            https://huggingface.co/allenai/tulu-30b  \n",
       "4             UW  https://huggingface.co/timdettmers/guanaco-65b...  \n",
       "..           ...                                                ...  \n",
       "100      Mistral             https://mistral.ai/news/mixtral-8x22b/  \n",
       "101         Meta                     https://llama.meta.com/llama3/  \n",
       "102         Meta                     https://llama.meta.com/llama3/  \n",
       "103       Google  https://blog.google/technology/ai/google-gemin...  \n",
       "104    Microsoft  https://azure.microsoft.com/en-us/blog/introdu...  \n",
       "\n",
       "[105 rows x 8 columns]"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "leaderboard_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['Overall', 'Coding', 'Longer Query', 'English', 'Chinese', 'French', 'Exclude Ties', 'Exclude Short Query (< 5 tokens)', 'Exclude Refusal'])"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arena_dfs.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "# merge ELO and Leaderboard data\n",
    "merged_dfs = {}\n",
    "for k, v in arena_dfs.items():\n",
    "    merged_dfs[k] = (\n",
    "        pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on=\"key\")\n",
    "        .sort_values(\"rating\", ascending=False)\n",
    "        .reset_index(drop=True)\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rating</th>\n",
       "      <th>variance</th>\n",
       "      <th>rating_q975</th>\n",
       "      <th>rating_q025</th>\n",
       "      <th>num_battles</th>\n",
       "      <th>final_ranking</th>\n",
       "      <th>key</th>\n",
       "      <th>Model</th>\n",
       "      <th>MT-bench (score)</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>Knowledge cutoff date</th>\n",
       "      <th>License</th>\n",
       "      <th>Organization</th>\n",
       "      <th>Link</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1257.399407</td>\n",
       "      <td>4.283316</td>\n",
       "      <td>1261.676224</td>\n",
       "      <td>1254.003626</td>\n",
       "      <td>30562</td>\n",
       "      <td>1</td>\n",
       "      <td>gpt-4-turbo-2024-04-09</td>\n",
       "      <td>GPT-4-Turbo-2024-04-09</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2023/12</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://platform.openai.com/docs/models/gpt-4-...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1253.025095</td>\n",
       "      <td>2.069534</td>\n",
       "      <td>1256.111392</td>\n",
       "      <td>1250.435207</td>\n",
       "      <td>69871</td>\n",
       "      <td>1</td>\n",
       "      <td>gpt-4-1106-preview</td>\n",
       "      <td>GPT-4-1106-preview</td>\n",
       "      <td>9.32</td>\n",
       "      <td>-</td>\n",
       "      <td>2023/4</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://openai.com/blog/new-models-and-develop...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1251.114220</td>\n",
       "      <td>1.862842</td>\n",
       "      <td>1253.629093</td>\n",
       "      <td>1248.362042</td>\n",
       "      <td>75684</td>\n",
       "      <td>2</td>\n",
       "      <td>claude-3-opus-20240229</td>\n",
       "      <td>Claude 3 Opus</td>\n",
       "      <td>-</td>\n",
       "      <td>0.868</td>\n",
       "      <td>2023/8</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Anthropic</td>\n",
       "      <td>https://www.anthropic.com/news/claude-3-family</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1247.662508</td>\n",
       "      <td>3.263747</td>\n",
       "      <td>1251.582645</td>\n",
       "      <td>1244.380454</td>\n",
       "      <td>33723</td>\n",
       "      <td>2</td>\n",
       "      <td>gemini-1.5-pro-api-0409-preview</td>\n",
       "      <td>Gemini 1.5 Pro API-0409-Preview</td>\n",
       "      <td>-</td>\n",
       "      <td>0.819</td>\n",
       "      <td>2023/11</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Google</td>\n",
       "      <td>https://blog.google/technology/ai/google-gemin...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1247.277052</td>\n",
       "      <td>1.923014</td>\n",
       "      <td>1249.489411</td>\n",
       "      <td>1244.340257</td>\n",
       "      <td>61924</td>\n",
       "      <td>3</td>\n",
       "      <td>gpt-4-0125-preview</td>\n",
       "      <td>GPT-4-0125-preview</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2023/12</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://openai.com/blog/new-models-and-develop...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1208.505408</td>\n",
       "      <td>6.679087</td>\n",
       "      <td>1213.291358</td>\n",
       "      <td>1203.926901</td>\n",
       "      <td>12388</td>\n",
       "      <td>6</td>\n",
       "      <td>bard-jan-24-gemini-pro</td>\n",
       "      <td>Bard (Gemini Pro)</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>Online</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Google</td>\n",
       "      <td>https://bard.google.com/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1207.497541</td>\n",
       "      <td>4.109466</td>\n",
       "      <td>1211.720734</td>\n",
       "      <td>1203.322762</td>\n",
       "      <td>27298</td>\n",
       "      <td>6</td>\n",
       "      <td>llama-3-70b-instruct</td>\n",
       "      <td>Llama-3-70b-Instruct</td>\n",
       "      <td>-</td>\n",
       "      <td>0.820</td>\n",
       "      <td>2023/12</td>\n",
       "      <td>Llama 3 Community</td>\n",
       "      <td>Meta</td>\n",
       "      <td>https://llama.meta.com/llama3/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1201.671254</td>\n",
       "      <td>2.525563</td>\n",
       "      <td>1204.862512</td>\n",
       "      <td>1198.658822</td>\n",
       "      <td>75418</td>\n",
       "      <td>6</td>\n",
       "      <td>claude-3-sonnet-20240229</td>\n",
       "      <td>Claude 3 Sonnet</td>\n",
       "      <td>-</td>\n",
       "      <td>0.790</td>\n",
       "      <td>2023/8</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Anthropic</td>\n",
       "      <td>https://www.anthropic.com/news/claude-3-family</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1191.684542</td>\n",
       "      <td>3.459717</td>\n",
       "      <td>1195.080256</td>\n",
       "      <td>1188.222382</td>\n",
       "      <td>41262</td>\n",
       "      <td>9</td>\n",
       "      <td>command-r-plus</td>\n",
       "      <td>Command R+</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2024/3</td>\n",
       "      <td>CC-BY-NC-4.0</td>\n",
       "      <td>Cohere</td>\n",
       "      <td>https://txt.cohere.com/command-r-plus-microsof...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1188.987389</td>\n",
       "      <td>3.124792</td>\n",
       "      <td>1193.335535</td>\n",
       "      <td>1185.935928</td>\n",
       "      <td>48390</td>\n",
       "      <td>9</td>\n",
       "      <td>gpt-4-0314</td>\n",
       "      <td>GPT-4-0314</td>\n",
       "      <td>8.96</td>\n",
       "      <td>0.864</td>\n",
       "      <td>2021/9</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://openai.com/research/gpt-4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1180.606870</td>\n",
       "      <td>3.097542</td>\n",
       "      <td>1183.825403</td>\n",
       "      <td>1177.255203</td>\n",
       "      <td>66065</td>\n",
       "      <td>11</td>\n",
       "      <td>claude-3-haiku-20240307</td>\n",
       "      <td>Claude 3 Haiku</td>\n",
       "      <td>-</td>\n",
       "      <td>0.752</td>\n",
       "      <td>2023/8</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Anthropic</td>\n",
       "      <td>https://www.anthropic.com/news/claude-3-family</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1164.896561</td>\n",
       "      <td>2.585577</td>\n",
       "      <td>1167.595696</td>\n",
       "      <td>1161.727454</td>\n",
       "      <td>67038</td>\n",
       "      <td>12</td>\n",
       "      <td>gpt-4-0613</td>\n",
       "      <td>GPT-4-0613</td>\n",
       "      <td>9.18</td>\n",
       "      <td>-</td>\n",
       "      <td>2021/9</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>OpenAI</td>\n",
       "      <td>https://platform.openai.com/docs/models/gpt-4-...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1157.638992</td>\n",
       "      <td>2.541320</td>\n",
       "      <td>1160.496116</td>\n",
       "      <td>1154.927748</td>\n",
       "      <td>44120</td>\n",
       "      <td>13</td>\n",
       "      <td>mistral-large-2402</td>\n",
       "      <td>Mistral-Large-2402</td>\n",
       "      <td>-</td>\n",
       "      <td>0.812</td>\n",
       "      <td>-</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Mistral</td>\n",
       "      <td>https://mistral.ai/news/mistral-large/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1153.464280</td>\n",
       "      <td>3.631512</td>\n",
       "      <td>1157.068850</td>\n",
       "      <td>1150.178903</td>\n",
       "      <td>32999</td>\n",
       "      <td>13</td>\n",
       "      <td>qwen1.5-72b-chat</td>\n",
       "      <td>Qwen1.5-72B-Chat</td>\n",
       "      <td>8.61</td>\n",
       "      <td>0.775</td>\n",
       "      <td>2024/2</td>\n",
       "      <td>Qianwen LICENSE</td>\n",
       "      <td>Alibaba</td>\n",
       "      <td>https://qwenlm.github.io/blog/qwen1.5/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1150.918473</td>\n",
       "      <td>9.062217</td>\n",
       "      <td>1155.969721</td>\n",
       "      <td>1145.229885</td>\n",
       "      <td>8622</td>\n",
       "      <td>13</td>\n",
       "      <td>reka-flash-21b-20240226-online</td>\n",
       "      <td>Reka-Flash-21B-online</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>Online</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Reka AI</td>\n",
       "      <td>https://docs.reka.ai/http-api.html#generation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1150.244313</td>\n",
       "      <td>5.551373</td>\n",
       "      <td>1154.745214</td>\n",
       "      <td>1145.496466</td>\n",
       "      <td>21768</td>\n",
       "      <td>14</td>\n",
       "      <td>claude-1</td>\n",
       "      <td>Claude-1</td>\n",
       "      <td>7.90</td>\n",
       "      <td>0.770</td>\n",
       "      <td>-</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Anthropic</td>\n",
       "      <td>https://www.anthropic.com/index/introducing-cl...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1149.267578</td>\n",
       "      <td>11.452272</td>\n",
       "      <td>1154.290155</td>\n",
       "      <td>1141.931621</td>\n",
       "      <td>9059</td>\n",
       "      <td>14</td>\n",
       "      <td>reka-flash-21b-20240226</td>\n",
       "      <td>Reka-Flash-21B</td>\n",
       "      <td>-</td>\n",
       "      <td>0.735</td>\n",
       "      <td>2023/11</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Reka AI</td>\n",
       "      <td>https://www.reka.ai/news/reka-flash-efficient-...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1148.072155</td>\n",
       "      <td>3.071222</td>\n",
       "      <td>1151.980865</td>\n",
       "      <td>1144.992044</td>\n",
       "      <td>37413</td>\n",
       "      <td>14</td>\n",
       "      <td>command-r</td>\n",
       "      <td>Command R</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>2024/3</td>\n",
       "      <td>CC-BY-NC-4.0</td>\n",
       "      <td>Cohere</td>\n",
       "      <td>https://txt.cohere.com/command-r</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1147.668325</td>\n",
       "      <td>3.542229</td>\n",
       "      <td>1150.726489</td>\n",
       "      <td>1143.868385</td>\n",
       "      <td>32738</td>\n",
       "      <td>14</td>\n",
       "      <td>mistral-medium</td>\n",
       "      <td>Mistral Medium</td>\n",
       "      <td>8.61</td>\n",
       "      <td>0.753</td>\n",
       "      <td>-</td>\n",
       "      <td>Proprietary</td>\n",
       "      <td>Mistral</td>\n",
       "      <td>https://mistral.ai/news/la-plateforme/</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1147.473989</td>\n",
       "      <td>5.789710</td>\n",
       "      <td>1151.989352</td>\n",
       "      <td>1143.322918</td>\n",
       "      <td>17214</td>\n",
       "      <td>14</td>\n",
       "      <td>mixtral-8x22b-instruct-v0.1</td>\n",
       "      <td>Mixtral-8x22b-Instruct-v0.1</td>\n",
       "      <td>-</td>\n",
       "      <td>0.778</td>\n",
       "      <td>2024/4</td>\n",
       "      <td>Apache 2.0</td>\n",
       "      <td>Mistral</td>\n",
       "      <td>https://mistral.ai/news/mixtral-8x22b/</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         rating   variance  rating_q975  rating_q025  num_battles  \\\n",
       "0   1257.399407   4.283316  1261.676224  1254.003626        30562   \n",
       "1   1253.025095   2.069534  1256.111392  1250.435207        69871   \n",
       "2   1251.114220   1.862842  1253.629093  1248.362042        75684   \n",
       "3   1247.662508   3.263747  1251.582645  1244.380454        33723   \n",
       "4   1247.277052   1.923014  1249.489411  1244.340257        61924   \n",
       "5   1208.505408   6.679087  1213.291358  1203.926901        12388   \n",
       "6   1207.497541   4.109466  1211.720734  1203.322762        27298   \n",
       "7   1201.671254   2.525563  1204.862512  1198.658822        75418   \n",
       "8   1191.684542   3.459717  1195.080256  1188.222382        41262   \n",
       "9   1188.987389   3.124792  1193.335535  1185.935928        48390   \n",
       "10  1180.606870   3.097542  1183.825403  1177.255203        66065   \n",
       "11  1164.896561   2.585577  1167.595696  1161.727454        67038   \n",
       "12  1157.638992   2.541320  1160.496116  1154.927748        44120   \n",
       "13  1153.464280   3.631512  1157.068850  1150.178903        32999   \n",
       "14  1150.918473   9.062217  1155.969721  1145.229885         8622   \n",
       "15  1150.244313   5.551373  1154.745214  1145.496466        21768   \n",
       "16  1149.267578  11.452272  1154.290155  1141.931621         9059   \n",
       "17  1148.072155   3.071222  1151.980865  1144.992044        37413   \n",
       "18  1147.668325   3.542229  1150.726489  1143.868385        32738   \n",
       "19  1147.473989   5.789710  1151.989352  1143.322918        17214   \n",
       "\n",
       "    final_ranking                              key  \\\n",
       "0               1           gpt-4-turbo-2024-04-09   \n",
       "1               1               gpt-4-1106-preview   \n",
       "2               2           claude-3-opus-20240229   \n",
       "3               2  gemini-1.5-pro-api-0409-preview   \n",
       "4               3               gpt-4-0125-preview   \n",
       "5               6           bard-jan-24-gemini-pro   \n",
       "6               6             llama-3-70b-instruct   \n",
       "7               6         claude-3-sonnet-20240229   \n",
       "8               9                   command-r-plus   \n",
       "9               9                       gpt-4-0314   \n",
       "10             11          claude-3-haiku-20240307   \n",
       "11             12                       gpt-4-0613   \n",
       "12             13               mistral-large-2402   \n",
       "13             13                 qwen1.5-72b-chat   \n",
       "14             13   reka-flash-21b-20240226-online   \n",
       "15             14                         claude-1   \n",
       "16             14          reka-flash-21b-20240226   \n",
       "17             14                        command-r   \n",
       "18             14                   mistral-medium   \n",
       "19             14      mixtral-8x22b-instruct-v0.1   \n",
       "\n",
       "                              Model MT-bench (score)   MMLU  \\\n",
       "0            GPT-4-Turbo-2024-04-09                -      -   \n",
       "1                GPT-4-1106-preview             9.32      -   \n",
       "2                     Claude 3 Opus                -  0.868   \n",
       "3   Gemini 1.5 Pro API-0409-Preview                -  0.819   \n",
       "4                GPT-4-0125-preview                -      -   \n",
       "5                 Bard (Gemini Pro)                -      -   \n",
       "6              Llama-3-70b-Instruct                -  0.820   \n",
       "7                   Claude 3 Sonnet                -  0.790   \n",
       "8                        Command R+                -      -   \n",
       "9                        GPT-4-0314             8.96  0.864   \n",
       "10                   Claude 3 Haiku                -  0.752   \n",
       "11                       GPT-4-0613             9.18      -   \n",
       "12               Mistral-Large-2402                -  0.812   \n",
       "13                 Qwen1.5-72B-Chat             8.61  0.775   \n",
       "14            Reka-Flash-21B-online                -      -   \n",
       "15                         Claude-1             7.90  0.770   \n",
       "16                   Reka-Flash-21B                -  0.735   \n",
       "17                        Command R                -      -   \n",
       "18                   Mistral Medium             8.61  0.753   \n",
       "19      Mixtral-8x22b-Instruct-v0.1                -  0.778   \n",
       "\n",
       "   Knowledge cutoff date            License Organization  \\\n",
       "0                2023/12        Proprietary       OpenAI   \n",
       "1                 2023/4        Proprietary       OpenAI   \n",
       "2                 2023/8        Proprietary    Anthropic   \n",
       "3                2023/11        Proprietary       Google   \n",
       "4                2023/12        Proprietary       OpenAI   \n",
       "5                 Online        Proprietary       Google   \n",
       "6                2023/12  Llama 3 Community         Meta   \n",
       "7                 2023/8        Proprietary    Anthropic   \n",
       "8                 2024/3       CC-BY-NC-4.0       Cohere   \n",
       "9                 2021/9        Proprietary       OpenAI   \n",
       "10                2023/8        Proprietary    Anthropic   \n",
       "11                2021/9        Proprietary       OpenAI   \n",
       "12                     -        Proprietary      Mistral   \n",
       "13                2024/2    Qianwen LICENSE      Alibaba   \n",
       "14                Online        Proprietary      Reka AI   \n",
       "15                     -        Proprietary    Anthropic   \n",
       "16               2023/11        Proprietary      Reka AI   \n",
       "17                2024/3       CC-BY-NC-4.0       Cohere   \n",
       "18                     -        Proprietary      Mistral   \n",
       "19                2024/4         Apache 2.0      Mistral   \n",
       "\n",
       "                                                 Link  \n",
       "0   https://platform.openai.com/docs/models/gpt-4-...  \n",
       "1   https://openai.com/blog/new-models-and-develop...  \n",
       "2      https://www.anthropic.com/news/claude-3-family  \n",
       "3   https://blog.google/technology/ai/google-gemin...  \n",
       "4   https://openai.com/blog/new-models-and-develop...  \n",
       "5                            https://bard.google.com/  \n",
       "6                      https://llama.meta.com/llama3/  \n",
       "7      https://www.anthropic.com/news/claude-3-family  \n",
       "8   https://txt.cohere.com/command-r-plus-microsof...  \n",
       "9                   https://openai.com/research/gpt-4  \n",
       "10     https://www.anthropic.com/news/claude-3-family  \n",
       "11  https://platform.openai.com/docs/models/gpt-4-...  \n",
       "12             https://mistral.ai/news/mistral-large/  \n",
       "13             https://qwenlm.github.io/blog/qwen1.5/  \n",
       "14      https://docs.reka.ai/http-api.html#generation  \n",
       "15  https://www.anthropic.com/index/introducing-cl...  \n",
       "16  https://www.reka.ai/news/reka-flash-efficient-...  \n",
       "17                   https://txt.cohere.com/command-r  \n",
       "18             https://mistral.ai/news/la-plateforme/  \n",
       "19             https://mistral.ai/news/mixtral-8x22b/  "
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_dfs[\"Overall\"][:20]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Manually map release dates - MEH."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = merged_dfs[\"Overall\"].loc[:, [\"key\", \"Model\"]]\n",
    "t[\"Release Date\"] = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "t.to_json(\"release_date_mapping.json\", orient=\"records\", lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'key': 'gpt-4-turbo-2024-04-09',\n",
       "  'Model': 'GPT-4-Turbo-2024-04-09',\n",
       "  'Release Date': ''},\n",
       " {'key': 'gpt-4-1106-preview',\n",
       "  'Model': 'GPT-4-1106-preview',\n",
       "  'Release Date': ''},\n",
       " {'key': 'claude-3-opus-20240229',\n",
       "  'Model': 'Claude 3 Opus',\n",
       "  'Release Date': ''},\n",
       " {'key': 'gemini-1.5-pro-api-0409-preview',\n",
       "  'Model': 'Gemini 1.5 Pro API-0409-Preview',\n",
       "  'Release Date': ''},\n",
       " {'key': 'gpt-4-0125-preview',\n",
       "  'Model': 'GPT-4-0125-preview',\n",
       "  'Release Date': ''},\n",
       " {'key': 'bard-jan-24-gemini-pro',\n",
       "  'Model': 'Bard (Gemini Pro)',\n",
       "  'Release Date': ''},\n",
       " {'key': 'llama-3-70b-instruct',\n",
       "  'Model': 'Llama-3-70b-Instruct',\n",
       "  'Release Date': ''},\n",
       " {'key': 'claude-3-sonnet-20240229',\n",
       "  'Model': 'Claude 3 Sonnet',\n",
       "  'Release Date': ''},\n",
       " {'key': 'command-r-plus', 'Model': 'Command R+', 'Release Date': ''},\n",
       " {'key': 'gpt-4-0314', 'Model': 'GPT-4-0314', 'Release Date': ''},\n",
       " {'key': 'claude-3-haiku-20240307',\n",
       "  'Model': 'Claude 3 Haiku',\n",
       "  'Release Date': ''},\n",
       " {'key': 'gpt-4-0613', 'Model': 'GPT-4-0613', 'Release Date': ''},\n",
       " {'key': 'mistral-large-2402',\n",
       "  'Model': 'Mistral-Large-2402',\n",
       "  'Release Date': ''},\n",
       " {'key': 'qwen1.5-72b-chat', 'Model': 'Qwen1.5-72B-Chat', 'Release Date': ''},\n",
       " {'key': 'reka-flash-21b-20240226-online',\n",
       "  'Model': 'Reka-Flash-21B-online',\n",
       "  'Release Date': ''},\n",
       " {'key': 'claude-1', 'Model': 'Claude-1', 'Release Date': ''},\n",
       " {'key': 'reka-flash-21b-20240226',\n",
       "  'Model': 'Reka-Flash-21B',\n",
       "  'Release Date': ''},\n",
       " {'key': 'command-r', 'Model': 'Command R', 'Release Date': ''},\n",
       " {'key': 'mistral-medium', 'Model': 'Mistral Medium', 'Release Date': ''},\n",
       " {'key': 'mixtral-8x22b-instruct-v0.1',\n",
       "  'Model': 'Mixtral-8x22b-Instruct-v0.1',\n",
       "  'Release Date': ''},\n",
       " {'key': 'llama-3-8b-instruct',\n",
       "  'Model': 'Llama-3-8b-Instruct',\n",
       "  'Release Date': ''},\n",
       " {'key': 'gemini-pro-dev-api',\n",
       "  'Model': 'Gemini Pro (Dev API)',\n",
       "  'Release Date': ''},\n",
       " {'key': 'qwen1.5-32b-chat', 'Model': 'Qwen1.5-32B-Chat', 'Release Date': ''},\n",
       " {'key': 'claude-2.0', 'Model': 'Claude-2.0', 'Release Date': ''},\n",
       " {'key': 'mistral-next', 'Model': 'Mistral-Next', 'Release Date': ''},\n",
       " {'key': 'zephyr-orpo-141b-A35b-v0.1',\n",
       "  'Model': 'Zephyr-ORPO-141b-A35b-v0.1',\n",
       "  'Release Date': ''},\n",
       " {'key': 'gpt-3.5-turbo-0613',\n",
       "  'Model': 'GPT-3.5-Turbo-0613',\n",
       "  'Release Date': ''},\n",
       " {'key': 'claude-2.1', 'Model': 'Claude-2.1', 'Release Date': ''},\n",
       " {'key': 'qwen1.5-14b-chat', 'Model': 'Qwen1.5-14B-Chat', 'Release Date': ''},\n",
       " {'key': 'starling-lm-7b-beta',\n",
       "  'Model': 'Starling-LM-7B-beta',\n",
       "  'Release Date': ''},\n",
       " {'key': 'gemini-pro', 'Model': 'Gemini Pro', 'Release Date': ''},\n",
       " {'key': 'mixtral-8x7b-instruct-v0.1',\n",
       "  'Model': 'Mixtral-8x7b-Instruct-v0.1',\n",
       "  'Release Date': ''},\n",
       " {'key': 'claude-instant-1', 'Model': 'Claude-Instant-1', 'Release Date': ''},\n",
       " {'key': 'yi-34b-chat', 'Model': 'Yi-34B-Chat', 'Release Date': ''},\n",
       " {'key': 'gpt-3.5-turbo-0314',\n",
       "  'Model': 'GPT-3.5-Turbo-0314',\n",
       "  'Release Date': ''},\n",
       " {'key': 'wizardlm-70b', 'Model': 'WizardLM-70B-v1.0', 'Release Date': ''},\n",
       " {'key': 'gpt-3.5-turbo-0125',\n",
       "  'Model': 'GPT-3.5-Turbo-0125',\n",
       "  'Release Date': ''},\n",
       " {'key': 'tulu-2-dpo-70b', 'Model': 'Tulu-2-DPO-70B', 'Release Date': ''},\n",
       " {'key': 'dbrx-instruct-preview',\n",
       "  'Model': 'DBRX-Instruct-Preview',\n",
       "  'Release Date': ''},\n",
       " {'key': 'openchat-3.5-0106',\n",
       "  'Model': 'OpenChat-3.5-0106',\n",
       "  'Release Date': ''},\n",
       " {'key': 'vicuna-33b', 'Model': 'Vicuna-33B', 'Release Date': ''},\n",
       " {'key': 'starling-lm-7b-alpha',\n",
       "  'Model': 'Starling-LM-7B-alpha',\n",
       "  'Release Date': ''},\n",
       " {'key': 'llama-2-70b-chat', 'Model': 'Llama-2-70b-chat', 'Release Date': ''},\n",
       " {'key': 'nous-hermes-2-mixtral-8x7b-dpo',\n",
       "  'Model': 'Nous-Hermes-2-Mixtral-8x7B-DPO',\n",
       "  'Release Date': ''},\n",
       " {'key': 'gemma-1.1-7b-it', 'Model': 'Gemma-1.1-7B-it', 'Release Date': ''},\n",
       " {'key': 'llama2-70b-steerlm-chat',\n",
       "  'Model': 'NV-Llama2-70B-SteerLM-Chat',\n",
       "  'Release Date': ''},\n",
       " {'key': 'deepseek-llm-67b-chat',\n",
       "  'Model': 'DeepSeek-LLM-67B-Chat',\n",
       "  'Release Date': ''},\n",
       " {'key': 'openhermes-2.5-mistral-7b',\n",
       "  'Model': 'OpenHermes-2.5-Mistral-7b',\n",
       "  'Release Date': ''},\n",
       " {'key': 'openchat-3.5', 'Model': 'OpenChat-3.5', 'Release Date': ''},\n",
       " {'key': 'pplx-70b-online', 'Model': 'pplx-70b-online', 'Release Date': ''},\n",
       " {'key': 'mistral-7b-instruct-v0.2',\n",
       "  'Model': 'Mistral-7B-Instruct-v0.2',\n",
       "  'Release Date': ''},\n",
       " {'key': 'qwen1.5-7b-chat', 'Model': 'Qwen1.5-7B-Chat', 'Release Date': ''},\n",
       " {'key': 'gpt-3.5-turbo-1106',\n",
       "  'Model': 'GPT-3.5-Turbo-1106',\n",
       "  'Release Date': ''},\n",
       " {'key': 'dolphin-2.2.1-mistral-7b',\n",
       "  'Model': 'Dolphin-2.2.1-Mistral-7B',\n",
       "  'Release Date': ''},\n",
       " {'key': 'solar-10.7b-instruct-v1.0',\n",
       "  'Model': 'SOLAR-10.7B-Instruct-v1.0',\n",
       "  'Release Date': ''},\n",
       " {'key': 'phi-3-mini-128k-instruct',\n",
       "  'Model': 'Phi-3-Mini-128k-Instruct',\n",
       "  'Release Date': ''},\n",
       " {'key': 'wizardlm-13b', 'Model': 'WizardLM-13b-v1.2', 'Release Date': ''},\n",
       " {'key': 'llama-2-13b-chat', 'Model': 'Llama-2-13b-chat', 'Release Date': ''},\n",
       " {'key': 'zephyr-7b-beta', 'Model': 'Zephyr-7b-beta', 'Release Date': ''},\n",
       " {'key': 'codellama-70b-instruct',\n",
       "  'Model': 'CodeLlama-70B-instruct',\n",
       "  'Release Date': ''},\n",
       " {'key': 'mpt-30b-chat', 'Model': 'MPT-30B-chat', 'Release Date': ''},\n",
       " {'key': 'vicuna-13b', 'Model': 'Vicuna-13B', 'Release Date': ''},\n",
       " {'key': 'codellama-34b-instruct',\n",
       "  'Model': 'CodeLlama-34B-instruct',\n",
       "  'Release Date': ''},\n",
       " {'key': 'gemma-7b-it', 'Model': 'Gemma-7B-it', 'Release Date': ''},\n",
       " {'key': 'pplx-7b-online', 'Model': 'pplx-7b-online', 'Release Date': ''},\n",
       " {'key': 'zephyr-7b-alpha', 'Model': 'Zephyr-7b-alpha', 'Release Date': ''},\n",
       " {'key': 'llama-2-7b-chat', 'Model': 'Llama-2-7b-chat', 'Release Date': ''},\n",
       " {'key': 'qwen-14b-chat', 'Model': 'Qwen-14B-Chat', 'Release Date': ''},\n",
       " {'key': 'falcon-180b-chat', 'Model': 'falcon-180b-chat', 'Release Date': ''},\n",
       " {'key': 'guanaco-33b', 'Model': 'Guanaco-33B', 'Release Date': ''},\n",
       " {'key': 'stripedhyena-nous-7b',\n",
       "  'Model': 'StripedHyena-Nous-7B',\n",
       "  'Release Date': ''},\n",
       " {'key': 'olmo-7b-instruct', 'Model': 'OLMo-7B-instruct', 'Release Date': ''},\n",
       " {'key': 'gemma-1.1-2b-it', 'Model': 'Gemma-1.1-2B-it', 'Release Date': ''},\n",
       " {'key': 'mistral-7b-instruct',\n",
       "  'Model': 'Mistral-7B-Instruct-v0.1',\n",
       "  'Release Date': ''},\n",
       " {'key': 'palm-2', 'Model': 'PaLM-Chat-Bison-001', 'Release Date': ''},\n",
       " {'key': 'vicuna-7b', 'Model': 'Vicuna-7B', 'Release Date': ''},\n",
       " {'key': 'qwen1.5-4b-chat', 'Model': 'Qwen1.5-4B-Chat', 'Release Date': ''},\n",
       " {'key': 'gemma-2b-it', 'Model': 'Gemma-2B-it', 'Release Date': ''},\n",
       " {'key': 'koala-13b', 'Model': 'Koala-13B', 'Release Date': ''},\n",
       " {'key': 'chatglm3-6b', 'Model': 'ChatGLM3-6B', 'Release Date': ''},\n",
       " {'key': 'gpt4all-13b-snoozy',\n",
       "  'Model': 'GPT4All-13B-Snoozy',\n",
       "  'Release Date': ''},\n",
       " {'key': 'chatglm2-6b', 'Model': 'ChatGLM2-6B', 'Release Date': ''},\n",
       " {'key': 'mpt-7b-chat', 'Model': 'MPT-7B-Chat', 'Release Date': ''},\n",
       " {'key': 'RWKV-4-Raven-14B', 'Model': 'RWKV-4-Raven-14B', 'Release Date': ''},\n",
       " {'key': 'alpaca-13b', 'Model': 'Alpaca-13B', 'Release Date': ''},\n",
       " {'key': 'oasst-pythia-12b',\n",
       "  'Model': 'OpenAssistant-Pythia-12B',\n",
       "  'Release Date': ''},\n",
       " {'key': 'chatglm-6b', 'Model': 'ChatGLM-6B', 'Release Date': ''},\n",
       " {'key': 'fastchat-t5-3b', 'Model': 'FastChat-T5-3B', 'Release Date': ''},\n",
       " {'key': 'stablelm-tuned-alpha-7b',\n",
       "  'Model': 'StableLM-Tuned-Alpha-7B',\n",
       "  'Release Date': ''},\n",
       " {'key': 'dolly-v2-12b', 'Model': 'Dolly-V2-12B', 'Release Date': ''},\n",
       " {'key': 'llama-13b', 'Model': 'LLaMA-13B', 'Release Date': ''}]"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t.to_dict(orient=\"records\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build plot"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}