Leaderboard / evals /mjbench /latex_reults /original_scale_study.tex
yichao's picture
update mj-bench
b650828
raw
history blame
1.58 kB
\begin{table}[t]
\centering
\caption{Result with different scale.}
\resizebox{1.0\linewidth}{!}{%
\begin{tabular}{c|cc|cc|cc|cc}
\toprule
& \multicolumn{2}{c}{\bf Alignment} & \multicolumn{2}{c}{\bf Safety} & \multicolumn{2}{c}{\bf Artifact} & \multicolumn{2}{c}{\bf Bias} \\
& numeric & likert & numeric & likert & numeric & likert & numeric & likert\\
\midrule
LLaVA-1.5-7b$^\heartsuit$ & - & - & - & - & - & - & - & - \\
LLaVA-1.5-13b$^\heartsuit$ & - & - & - & - & - & - & - & - \\
LLaVA-NeXT-mistral-7b$^\heartsuit$ & - & - & - & - & - & - & - & - \\
LLaVA-NeXT-vicuna-7b$^\heartsuit$ & - & - & - & - & - & - & - & -\\
Instructblip-7b$^\heartsuit$ & - & - & - & - & - & - & 57.4 & 85.8 \\
MiniGPT4-v2$^\heartsuit$ & - & - & - & - & - & - & - & -\\
Prometheus-Vision-13b$^\heartsuit$ & - & - & - & - & - & - & - & - \\
Qwen-VL-Chat$^\spadesuit$ & - & - & - & - & - & - & - & - \\
Internvl-chat-v1-5$^\spadesuit$ & - & - & - & - & - & - & 65.3 & 83.5 \\
Idefics2-8b$^\spadesuit$ & - & - & - & - & - & - & 52.7 & 77.6 \\
\midrule
GPT-4-vision$^\clubsuit$ & - & - & - & - & - & - & 80.4 & 93.2 \\
GPT-4o$^\clubsuit$ & - & - & - & - & - & - & 82.5 & 92.8 \\
Gemini Ultra$^\clubsuit$ & - & - & - & - & - & - & 75.3 & 88.6 \\
Claude 3 Opus$^\clubsuit$ & - & - & - & - & - & - & 65.6 & 85.0 \\
\bottomrule
\end{tabular}%
}
\label{exp:numeric_likert}
\end{table}