{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import json\n", "import copy as cp\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import matplotlib.font_manager as fm\n", "\n", "def download_file(url, filename=None):\n", " from urllib.request import urlretrieve\n", " if filename is None:\n", " filename = url.split('/')[-1]\n", " urlretrieve(url, filename)\n", "\n", "font_URL = 'http://opencompass.openxlab.space/utils/Fonts/segoepr.ttf'\n", "download_file(font_URL)\n", "\n", "font12 = fm.FontProperties(fname='segoepr.ttf', size=12)\n", "font15 = fm.FontProperties(fname='segoepr.ttf', size=15, weight='bold')\n", "font18 = fm.FontProperties(fname='segoepr.ttf', size=18, weight='bold')\n", "\n", "DATA_URL = 'http://opencompass.openxlab.space/utils/OpenVLM.json'\n", "download_file(DATA_URL)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def pre_normalize(raw_data, labels):\n", " data_list = cp.deepcopy(raw_data)\n", " minimum, maximum, max_range, range_map = {}, {}, 0, {}\n", " for lb in labels:\n", " minimum[lb] = min([x[lb] for x in data_list])\n", " maximum[lb] = max([x[lb] for x in data_list])\n", " max_range = max(max_range, maximum[lb] - minimum[lb])\n", " max_range *= 1.25\n", " for lb in labels:\n", " mid = (minimum[lb] + maximum[lb]) / 2\n", " new_range = (mid - max_range / 2, mid + max_range / 2) if (mid + max_range / 2) < 100 else (100 - max_range, 100)\n", " range_map[lb] = new_range\n", " for item in data_list:\n", " assert new_range[0] <= item[lb] <= new_range[1]\n", " item[lb] = (item[lb] - new_range[0]) / max_range * 100\n", " return data_list, range_map\n", "\n", "# solve the problem that some benchmark score is too high and out of range\n", "def log_normalize(raw_data, labels):\n", " data_list = cp.deepcopy(raw_data)\n", " minimum, maximum, max_range, range_map = {}, {}, 0, {}\n", " for lb in labels:\n", " minimum[lb] = min([np.log(x[lb]) for x in data_list])\n", " maximum[lb] = max([np.log(x[lb]) for x in data_list])\n", " max_range = max(max_range, maximum[lb] - minimum[lb])\n", " max_range *= 1.005\n", " for lb in labels:\n", " mid = (minimum[lb] + maximum[lb]) / 2\n", " new_range = (mid - max_range / 2, mid + max_range / 2) if (mid + max_range / 2) < 100 else (100 - max_range, 100)\n", " range_map[lb] = new_range\n", " for item in data_list:\n", " assert new_range[0] <= np.log(item[lb]) <= new_range[1]\n", " item[lb] = (np.log(item[lb]) - new_range[0]) / max_range * 100\n", " return data_list, range_map" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Draw MMBench Radar Graph\n", "data = json.loads(open('OpenVLM.json').read())['results']\n", "models = list(data)\n", "print(models)\n", "\n", "# model2vis = [\n", "# 'GPT-4v (detail: low)', 'GeminiProVision', 'Qwen-VL-Plus', \n", "# 'InternLM-XComposer2-VL', 'LLaVA-v1.5-13B', 'CogVLM-17B-Chat',\n", "# 'mPLUG-Owl2', 'Qwen-VL-Chat', 'IDEFICS-80B-Instruct'\n", "# ]\n", "\n", "model2vis = [\n", " # 'GPT-4v (detail: low)', 'GeminiProVision', 'InternLM-XComposer2-VL', \n", " 'GPT-4v (1106, detail-low)', 'Gemini-1.0-Pro', 'Gemini-1.5-Pro', #'Gemini-1.5-Flash', 'Qwen-VL-Plus', \n", " 'InternLM-XComposer2', 'LLaVA-v1.5-13B', 'CogVLM-17B-Chat',\n", " 'mPLUG-Owl2', 'Qwen-VL-Chat', 'IDEFICS-80B-Instruct'\n", "]\n", "\n", "colors = [\n", " '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', \n", " '#e377c2', '#7f7f7f', '#bcbd22'\n", "]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from collections import defaultdict\n", "\n", "split = 'MMBench_TEST_EN'\n", "# data_sub = {k: v[split] for k, v in data.items()}\n", "data_sub = {k: defaultdict(int, v)[split] for k, v in data.items()}\n", "# solve the problem that some model lack the evaluation of MMBench_TEST_EN\n", "\n", "labels = list(data_sub[model2vis[0]])\n", "labels.remove('Overall')\n", "num_vars = len(labels)\n", "\n", "raw_data = [data_sub[m] for m in model2vis]\n", "data_list, range_map = pre_normalize(raw_data, labels)\n", "\n", "alpha = 0.25\n", "angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n", "angles_deg = np.linspace(0, 360, num_vars, endpoint=False).tolist()\n", "fig, ax_base = plt.subplots(nrows=1, ncols=1, figsize=(10, 10), subplot_kw=dict(polar=True))\n", "\n", "for i in range(len(data_list)):\n", " item = data_list[i]\n", " model_name = model2vis[i]\n", " color = colors[i]\n", " tmp_angles = angles[:] + [angles[0]]\n", " tmp_values = [item[lb] for lb in labels] + [item[labels[0]]]\n", " ax_base.plot(tmp_angles, tmp_values, color=color, linewidth=1, linestyle='solid', label=model_name)\n", " ax_base.fill(tmp_angles, tmp_values, color=color, alpha=alpha)\n", " \n", "angles += [angles[0]]\n", "ax_base.set_ylim(0, 100)\n", "ax_base.set_yticks([40, 60, 80, 100])\n", "ax_base.set_yticklabels([''] * 4)\n", "\n", "ax_base.tick_params(pad=25)\n", "ax_base.set_xticks(angles[:-1])\n", "ax_base.set_xticklabels(labels, fontproperties=font18)\n", "\n", "leg = ax_base.legend(loc='center right', bbox_to_anchor=(1.6, 0.5), prop=font15, ncol=1, frameon=True, labelspacing=1.2)\n", "for line in leg.get_lines():\n", " line.set_linewidth(2.5)\n", "\n", "cx, cy, sz = 0.44, 0.435, 0.34\n", "axes = [fig.add_axes([cx - sz, cy - sz, cx + sz, cy + sz], projection='polar', label='axes%d' % i) for i in range(num_vars)]\n", " \n", "for ax, angle, label in zip(axes, angles_deg, labels):\n", " ax.patch.set_visible(False)\n", " ax.grid(False)\n", " ax.xaxis.set_visible(False)\n", " cur_range = range_map[label]\n", " label_list = [cur_range[0] + (cur_range[1] - cur_range[0]) / 5 * i for i in range(2, 6)]\n", " label_list = [f'{x:.1f}' for x in label_list]\n", " ax.set_rgrids(range(40, 120, 20), angle=angle, labels=label_list, font_properties=font12)\n", " ax.spines['polar'].set_visible(False)\n", " ax.set_ylim(0, 100)\n", "\n", "title_text = f'{len(model2vis)} Representative VLMs on MMBench Test.'\n", "plt.figtext(.7, .95, title_text, fontproperties=font18, ha='center')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "labels = ['SEEDBench_IMG', 'CCBench', 'MMBench_TEST_EN', 'MMBench_TEST_CN', 'MME', 'MMVet', 'MMMU_VAL', 'MathVista', 'HallusionBench', 'LLaVABench']\n", "num_vars = len(labels)\n", "\n", "raw_data = [{k: data[m][k]['Overall'] for k in labels} for m in model2vis]\n", "data_list, range_map = pre_normalize(raw_data, labels)\n", "\n", "alpha = 0.25\n", "angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n", "angles_deg = np.linspace(0, 360, num_vars, endpoint=False).tolist()\n", "fig, ax_base = plt.subplots(nrows=1, ncols=1, figsize=(10, 10), subplot_kw=dict(polar=True))\n", "\n", "for i in range(len(data_list)):\n", " item = data_list[i]\n", " model_name = model2vis[i]\n", " color = colors[i]\n", " tmp_angles = angles[:] + [angles[0]]\n", " tmp_values = [item[lb] for lb in labels] + [item[labels[0]]]\n", " ax_base.plot(tmp_angles, tmp_values, color=color, linewidth=1, linestyle='solid', label=model_name)\n", " ax_base.fill(tmp_angles, tmp_values, color=color, alpha=alpha)\n", " \n", "angles += [angles[0]]\n", "ax_base.set_ylim(0, 100)\n", "ax_base.set_yticks([40, 60, 80, 100])\n", "ax_base.set_yticklabels([''] * 4)\n", "\n", "ax_base.tick_params(pad=15)\n", "ax_base.set_xticks(angles[:-1])\n", "ax_base.set_xticklabels(labels, fontproperties=font18)\n", "\n", "dataset_map = {\n", " 'MMBench_TEST_EN': 'MMBench (Test)', \n", " 'MMBench_TEST_CN': 'MMBenchCN (Test)', \n", " 'MathVista': 'MathVista (TestMini)', \n", " 'MMMU_VAL': 'MMMU (Val)'\n", "}\n", "for i, label in enumerate(ax_base.get_xticklabels()):\n", " x,y = label.get_position()\n", " text = label.get_text()\n", " text = dataset_map[text] if text in dataset_map else text\n", " lab = ax_base.text(x, y, text, transform=label.get_transform(),\n", " ha=label.get_ha(), va=label.get_va(), font_properties=font15)\n", " lab.set_rotation(360 / num_vars * i + 270)\n", " labels.append(lab)\n", "ax_base.set_xticklabels([])\n", "\n", "leg = ax_base.legend(loc='center right', bbox_to_anchor=(1.6, 0.5), prop=font15, ncol=1, frameon=True, labelspacing=1.2)\n", "for line in leg.get_lines():\n", " line.set_linewidth(2.5)\n", "\n", "cx, cy, sz = 0.44, 0.435, 0.34\n", "axes = [fig.add_axes([cx - sz, cy - sz, cx + sz, cy + sz], projection='polar', label='axes%d' % i) for i in range(num_vars)]\n", " \n", "for ax, angle, label in zip(axes, angles_deg, labels):\n", " ax.patch.set_visible(False)\n", " ax.grid(False)\n", " ax.xaxis.set_visible(False)\n", " cur_range = range_map[label]\n", " label_list = [cur_range[0] + (cur_range[1] - cur_range[0]) / 5 * i for i in range(2, 6)]\n", " label_list = [f'{x:.1f}' for x in label_list]\n", " ax.set_rgrids(range(40, 120, 20), angle=angle, labels=label_list, font_properties=font12)\n", " ax.spines['polar'].set_visible(False)\n", " ax.set_ylim(0, 100)\n", "\n", "title_text = f'{len(model2vis)} Representative VLMs on {num_vars} Benchmarks in OpenCompass Multi-Modal Leaderboard.'\n", "plt.figtext(.7, .95, title_text, fontproperties=font18, ha='center')\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 2 }