Leaderboard

Running

Leaderboard / evals /mjbench /latex_reults /original_scale_study.tex

update mj-bench

b650828 4 months ago

1.58 kB

	\begin{table}[t]
	\centering
	\caption{Result with different scale.}
	\resizebox{1.0\linewidth}{!}{%
	\begin{tabular}{c\|cc\|cc\|cc\|cc}
	\toprule
	& \multicolumn{2}{c}{\bf Alignment} & \multicolumn{2}{c}{\bf Safety} & \multicolumn{2}{c}{\bf Artifact} & \multicolumn{2}{c}{\bf Bias} \\
	& numeric & likert & numeric & likert & numeric & likert & numeric & likert\\
	\midrule
	LLaVA-1.5-7b$^\heartsuit$ & - & - & - & - & - & - & - & - \\
	LLaVA-1.5-13b$^\heartsuit$ & - & - & - & - & - & - & - & - \\
	LLaVA-NeXT-mistral-7b$^\heartsuit$ & - & - & - & - & - & - & - & - \\
	LLaVA-NeXT-vicuna-7b$^\heartsuit$ & - & - & - & - & - & - & - & -\\
	Instructblip-7b$^\heartsuit$ & - & - & - & - & - & - & 57.4 & 85.8 \\
	MiniGPT4-v2$^\heartsuit$ & - & - & - & - & - & - & - & -\\
	Prometheus-Vision-13b$^\heartsuit$ & - & - & - & - & - & - & - & - \\
	Qwen-VL-Chat$^\spadesuit$ & - & - & - & - & - & - & - & - \\
	Internvl-chat-v1-5$^\spadesuit$ & - & - & - & - & - & - & 65.3 & 83.5 \\
	Idefics2-8b$^\spadesuit$ & - & - & - & - & - & - & 52.7 & 77.6 \\
	\midrule
	GPT-4-vision$^\clubsuit$ & - & - & - & - & - & - & 80.4 & 93.2 \\
	GPT-4o$^\clubsuit$ & - & - & - & - & - & - & 82.5 & 92.8 \\
	Gemini Ultra$^\clubsuit$ & - & - & - & - & - & - & 75.3 & 88.6 \\
	Claude 3 Opus$^\clubsuit$ & - & - & - & - & - & - & 65.6 & 85.0 \\
	\bottomrule
	\end{tabular}%
	}
	\label{exp:numeric_likert}
	\end{table}