benchmark-race / data.json
davanstrien's picture
davanstrien HF Staff
Update data.json (2026-03-21 20:00 UTC)
71f66d5 verified
{
"benchmarks": {
"sweVerified": {
"name": "SWE-bench Verified",
"models": [
{
"model_id": "Qwen/Qwen3.5-397B-A17B",
"short_name": "Qwen3.5-397B-A17B",
"provider": "Qwen",
"score": 76.4,
"date": "2026-02-16"
},
{
"model_id": "MiniMaxAI/MiniMax-M2.5",
"short_name": "MiniMax-M2.5",
"provider": "MiniMaxAI",
"score": 75.8,
"date": "2026-02-12"
},
{
"model_id": "stepfun-ai/Step-3.5-Flash",
"short_name": "Step-3.5-Flash",
"provider": "stepfun-ai",
"score": 74.4,
"date": "2026-02-01"
},
{
"model_id": "MiniMaxAI/MiniMax-M2.1",
"short_name": "MiniMax-M2.1",
"provider": "MiniMaxAI",
"score": 74.0,
"date": "2025-12-20"
},
{
"model_id": "zai-org/GLM-4.7",
"short_name": "GLM-4.7",
"provider": "zai-org",
"score": 73.8,
"date": "2025-12-22"
},
{
"model_id": "zai-org/GLM-5",
"short_name": "GLM-5",
"provider": "zai-org",
"score": 72.8,
"date": "2026-02-11"
},
{
"model_id": "Qwen/Qwen3.5-27B",
"short_name": "Qwen3.5-27B",
"provider": "Qwen",
"score": 72.4,
"date": "2026-02-24"
},
{
"model_id": "Qwen/Qwen3.5-122B-A10B",
"short_name": "Qwen3.5-122B-A10B",
"provider": "Qwen",
"score": 72.0,
"date": "2026-02-24"
},
{
"model_id": "moonshotai/Kimi-K2-Thinking",
"short_name": "Kimi-K2-Thinking",
"provider": "moonshotai",
"score": 71.3,
"date": "2025-11-04"
},
{
"model_id": "moonshotai/Kimi-K2.5",
"short_name": "Kimi-K2.5",
"provider": "moonshotai",
"score": 70.8,
"date": "2026-01-01"
},
{
"model_id": "Qwen/Qwen3-Coder-Next",
"short_name": "Qwen3-Coder-Next",
"provider": "Qwen",
"score": 70.6,
"date": "2026-01-30"
},
{
"model_id": "deepseek-ai/DeepSeek-V3.2",
"short_name": "DeepSeek-V3.2",
"provider": "deepseek-ai",
"score": 70.0,
"date": "2025-12-01"
},
{
"model_id": "MiniMaxAI/MiniMax-M2",
"short_name": "MiniMax-M2",
"provider": "MiniMaxAI",
"score": 69.4,
"date": "2025-10-22"
},
{
"model_id": "Qwen/Qwen3.5-35B-A3B",
"short_name": "Qwen3.5-35B-A3B",
"provider": "Qwen",
"score": 69.2,
"date": "2026-02-24"
},
{
"model_id": "GAIR/OpenSWE-72B",
"short_name": "OpenSWE-72B",
"provider": "GAIR",
"score": 66.0,
"date": "2026-03-15"
},
{
"model_id": "openai/gpt-oss-120b",
"short_name": "gpt-oss-120b",
"provider": "openai",
"score": 62.4,
"date": "2025-08-04"
},
{
"model_id": "GAIR/OpenSWE-32B",
"short_name": "OpenSWE-32B",
"provider": "GAIR",
"score": 62.4,
"date": "2026-03-15"
},
{
"model_id": "openai/gpt-oss-20b",
"short_name": "gpt-oss-20b",
"provider": "openai",
"score": 60.7,
"date": "2025-08-04"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"provider": "nvidia",
"score": 60.47,
"date": "2026-03-10"
},
{
"model_id": "zai-org/GLM-4.7-Flash",
"short_name": "GLM-4.7-Flash",
"provider": "zai-org",
"score": 59.2,
"date": "2026-01-19"
},
{
"model_id": "facebook/cwm",
"short_name": "cwm",
"provider": "facebook",
"score": 53.9,
"date": "2025-08-25"
},
{
"model_id": "SWE-Lego/SWE-Lego-Qwen3-32B",
"short_name": "SWE-Lego-Qwen3-32B",
"provider": "SWE-Lego",
"score": 52.6,
"date": "2026-01-05"
},
{
"model_id": "SWE-Lego/SWE-Lego-Qwen3-8B",
"short_name": "SWE-Lego-Qwen3-8B",
"provider": "SWE-Lego",
"score": 42.2,
"date": "2025-12-29"
}
]
},
"swePro": {
"name": "SWE-bench Pro",
"models": [
{
"model_id": "MiniMaxAI/MiniMax-M2.5",
"short_name": "MiniMax-M2.5",
"provider": "MiniMaxAI",
"score": 55.4,
"date": "2026-02-12"
},
{
"model_id": "moonshotai/Kimi-K2.5",
"short_name": "Kimi-K2.5",
"provider": "moonshotai",
"score": 50.7,
"date": "2026-01-01"
},
{
"model_id": "Qwen/Qwen3-Coder-Next",
"short_name": "Qwen3-Coder-Next",
"provider": "Qwen",
"score": 44.3,
"date": "2026-01-30"
},
{
"model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
"short_name": "Qwen3-Coder-480B-A35B-Instruct",
"provider": "Qwen",
"score": 38.7,
"date": "2025-07-22"
},
{
"model_id": "MiniMaxAI/MiniMax-M2.1",
"short_name": "MiniMax-M2.1",
"provider": "MiniMaxAI",
"score": 36.81,
"date": "2025-12-20"
},
{
"model_id": "moonshotai/Kimi-K2-Instruct",
"short_name": "Kimi-K2-Instruct",
"provider": "moonshotai",
"score": 27.67,
"date": "2025-07-11"
},
{
"model_id": "Qwen/Qwen3-235B-A22B",
"short_name": "Qwen3-235B-A22B",
"provider": "Qwen",
"score": 21.41,
"date": "2025-04-27"
},
{
"model_id": "openai/gpt-oss-120b",
"short_name": "gpt-oss-120b",
"provider": "openai",
"score": 16.2,
"date": "2025-08-04"
},
{
"model_id": "deepseek-ai/DeepSeek-V3.2",
"short_name": "DeepSeek-V3.2",
"provider": "deepseek-ai",
"score": 15.56,
"date": "2025-12-01"
},
{
"model_id": "google/gemma-3-27b-it",
"short_name": "gemma-3-27b-it",
"provider": "google",
"score": 11.38,
"date": "2025-03-01"
},
{
"model_id": "meta-llama/Llama-3.1-405B-Instruct",
"short_name": "Llama-3.1-405B-Instruct",
"provider": "meta-llama",
"score": 11.18,
"date": "2024-07-16"
},
{
"model_id": "zai-org/GLM-4.6",
"short_name": "GLM-4.6",
"provider": "zai-org",
"score": 9.67,
"date": "2025-09-29"
},
{
"model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
"short_name": "Llama-4-Maverick-17B-128E-Instruct",
"provider": "meta-llama",
"score": 5.24,
"date": "2025-04-01"
}
]
},
"mmluPro": {
"name": "MMLU-Pro",
"models": [
{
"model_id": "MiniMaxAI/MiniMax-M2.1",
"short_name": "MiniMax-M2.1",
"provider": "MiniMaxAI",
"score": 88.0,
"date": "2025-12-20"
},
{
"model_id": "Qwen/Qwen3.5-397B-A17B",
"short_name": "Qwen3.5-397B-A17B",
"provider": "Qwen",
"score": 87.8,
"date": "2026-02-16"
},
{
"model_id": "moonshotai/Kimi-K2.5",
"short_name": "Kimi-K2.5",
"provider": "moonshotai",
"score": 87.1,
"date": "2026-01-01"
},
{
"model_id": "Qwen/Qwen3.5-122B-A10B",
"short_name": "Qwen3.5-122B-A10B",
"provider": "Qwen",
"score": 86.7,
"date": "2026-02-24"
},
{
"model_id": "Qwen/Qwen3.5-27B",
"short_name": "Qwen3.5-27B",
"provider": "Qwen",
"score": 86.1,
"date": "2026-02-24"
},
{
"model_id": "Qwen/Qwen3.5-35B-A3B",
"short_name": "Qwen3.5-35B-A3B",
"provider": "Qwen",
"score": 85.3,
"date": "2026-02-24"
},
{
"model_id": "deepseek-ai/DeepSeek-R1-0528",
"short_name": "DeepSeek-R1-0528",
"provider": "deepseek-ai",
"score": 85.0,
"date": "2025-05-28"
},
{
"model_id": "deepseek-ai/DeepSeek-V3.2",
"short_name": "DeepSeek-V3.2",
"provider": "deepseek-ai",
"score": 85.0,
"date": "2025-12-01"
},
{
"model_id": "moonshotai/Kimi-K2-Thinking",
"short_name": "Kimi-K2-Thinking",
"provider": "moonshotai",
"score": 84.6,
"date": "2025-11-04"
},
{
"model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
"short_name": "Qwen3-235B-A22B-Thinking-2507",
"provider": "Qwen",
"score": 84.4,
"date": "2025-07-25"
},
{
"model_id": "stepfun-ai/Step-3.5-Flash",
"short_name": "Step-3.5-Flash",
"provider": "stepfun-ai",
"score": 84.4,
"date": "2026-02-01"
},
{
"model_id": "zai-org/GLM-4.7",
"short_name": "GLM-4.7",
"provider": "zai-org",
"score": 84.3,
"date": "2025-12-22"
},
{
"model_id": "deepseek-ai/DeepSeek-R1",
"short_name": "DeepSeek-R1",
"provider": "deepseek-ai",
"score": 84.0,
"date": "2025-01-20"
},
{
"model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B",
"short_name": "K-EXAONE-236B-A23B",
"provider": "LGAI-EXAONE",
"score": 83.8,
"date": "2025-12-26"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"provider": "nvidia",
"score": 83.73,
"date": "2026-03-10"
},
{
"model_id": "Qwen/Qwen3.5-9B",
"short_name": "Qwen3.5-9B",
"provider": "Qwen",
"score": 82.5,
"date": "2026-02-27"
},
{
"model_id": "MiniMaxAI/MiniMax-M2",
"short_name": "MiniMax-M2",
"provider": "MiniMaxAI",
"score": 82.0,
"date": "2025-10-22"
},
{
"model_id": "deepseek-ai/DeepSeek-V3-0324",
"short_name": "DeepSeek-V3-0324",
"provider": "deepseek-ai",
"score": 81.2,
"date": "2025-03-24"
},
{
"model_id": "jdopensource/JoyAI-LLM-Flash",
"short_name": "JoyAI-LLM-Flash",
"provider": "jdopensource",
"score": 81.02,
"date": "2026-02-14"
},
{
"model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct",
"short_name": "Qwen3-Next-80B-A3B-Instruct",
"provider": "Qwen",
"score": 80.6,
"date": "2025-09-09"
},
{
"model_id": "nvidia/Nemotron-Cascade-2-30B-A3B",
"short_name": "Nemotron-Cascade-2-30B-A3B",
"provider": "nvidia",
"score": 79.8,
"date": "2026-03-18"
},
{
"model_id": "Qwen/Qwen3.5-4B",
"short_name": "Qwen3.5-4B",
"provider": "Qwen",
"score": 79.1,
"date": "2026-02-27"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"provider": "nvidia",
"score": 78.3,
"date": "2025-12-04"
},
{
"model_id": "meituan-longcat/LongCat-Flash-Lite",
"short_name": "LongCat-Flash-Lite",
"provider": "meituan-longcat",
"score": 78.29,
"date": "2026-01-27"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
"short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
"provider": "nvidia",
"score": 78.1,
"date": "2025-12-06"
},
{
"model_id": "mistralai/Mistral-Small-4-119B-2603",
"short_name": "Mistral-Small-4-119B-2603",
"provider": "mistralai",
"score": 78.0,
"date": "2026-01-23"
},
{
"model_id": "arcee-ai/Trinity-Large-Preview",
"short_name": "Trinity-Large-Preview",
"provider": "arcee-ai",
"score": 75.2,
"date": "2026-01-27"
},
{
"model_id": "Qwen/Qwen3-4B-Thinking-2507",
"short_name": "Qwen3-4B-Thinking-2507",
"provider": "Qwen",
"score": 74.0,
"date": "2025-08-05"
},
{
"model_id": "tiiuae/Falcon-H1R-7B",
"short_name": "Falcon-H1R-7B",
"provider": "tiiuae",
"score": 72.1,
"date": "2025-10-29"
},
{
"model_id": "Qwen/Qwen3-4B-Instruct-2507",
"short_name": "Qwen3-4B-Instruct-2507",
"provider": "Qwen",
"score": 69.6,
"date": "2025-08-05"
},
{
"model_id": "deepseek-ai/DeepSeek-V3",
"short_name": "DeepSeek-V3",
"provider": "deepseek-ai",
"score": 64.4,
"date": "2024-12-25"
},
{
"model_id": "Qwen/Qwen3.5-2B",
"short_name": "Qwen3.5-2B",
"provider": "Qwen",
"score": 55.3,
"date": "2026-02-28"
},
{
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
"short_name": "Llama-3.1-8B-Instruct",
"provider": "meta-llama",
"score": 48.3,
"date": "2024-07-18"
},
{
"model_id": "LiquidAI/LFM2.5-1.2B-Instruct",
"short_name": "LFM2.5-1.2B-Instruct",
"provider": "LiquidAI",
"score": 44.35,
"date": "2026-01-06"
},
{
"model_id": "Qwen/Qwen3.5-0.8B",
"short_name": "Qwen3.5-0.8B",
"provider": "Qwen",
"score": 29.7,
"date": "2026-02-28"
}
]
},
"gpqa": {
"name": "GPQA Diamond",
"models": [
{
"model_id": "Qwen/Qwen3.5-397B-A17B",
"short_name": "Qwen3.5-397B-A17B",
"provider": "Qwen",
"score": 88.4,
"date": "2026-02-16"
},
{
"model_id": "moonshotai/Kimi-K2.5",
"short_name": "Kimi-K2.5",
"provider": "moonshotai",
"score": 87.6,
"date": "2026-01-01"
},
{
"model_id": "Qwen/Qwen3.5-122B-A10B",
"short_name": "Qwen3.5-122B-A10B",
"provider": "Qwen",
"score": 86.6,
"date": "2026-02-24"
},
{
"model_id": "zai-org/GLM-5",
"short_name": "GLM-5",
"provider": "zai-org",
"score": 86.0,
"date": "2026-02-11"
},
{
"model_id": "zai-org/GLM-4.7",
"short_name": "GLM-4.7",
"provider": "zai-org",
"score": 85.7,
"date": "2025-12-22"
},
{
"model_id": "Qwen/Qwen3.5-27B",
"short_name": "Qwen3.5-27B",
"provider": "Qwen",
"score": 85.5,
"date": "2026-02-24"
},
{
"model_id": "MiniMaxAI/MiniMax-M2.5",
"short_name": "MiniMax-M2.5",
"provider": "MiniMaxAI",
"score": 85.2,
"date": "2026-02-12"
},
{
"model_id": "moonshotai/Kimi-K2-Thinking",
"short_name": "Kimi-K2-Thinking",
"provider": "moonshotai",
"score": 84.5,
"date": "2025-11-04"
},
{
"model_id": "Qwen/Qwen3.5-35B-A3B",
"short_name": "Qwen3.5-35B-A3B",
"provider": "Qwen",
"score": 84.2,
"date": "2026-02-24"
},
{
"model_id": "Nanbeige/Nanbeige4.1-3B",
"short_name": "Nanbeige4.1-3B",
"provider": "Nanbeige",
"score": 83.8,
"date": "2026-02-10"
},
{
"model_id": "stepfun-ai/Step-3.5-Flash",
"short_name": "Step-3.5-Flash",
"provider": "stepfun-ai",
"score": 83.5,
"date": "2026-02-01"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"provider": "nvidia",
"score": 82.7,
"date": "2026-03-10"
},
{
"model_id": "deepseek-ai/DeepSeek-V3.2",
"short_name": "DeepSeek-V3.2",
"provider": "deepseek-ai",
"score": 82.4,
"date": "2025-12-01"
},
{
"model_id": "Qwen/Qwen3.5-9B",
"short_name": "Qwen3.5-9B",
"provider": "Qwen",
"score": 81.7,
"date": "2026-02-27"
},
{
"model_id": "openai/gpt-oss-120b",
"short_name": "gpt-oss-120b",
"provider": "openai",
"score": 80.9,
"date": "2025-08-04"
},
{
"model_id": "meituan-longcat/LongCat-Flash-Thinking-2601",
"short_name": "LongCat-Flash-Thinking-2601",
"provider": "meituan-longcat",
"score": 80.5,
"date": "2026-01-14"
},
{
"model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B",
"short_name": "K-EXAONE-236B-A23B",
"provider": "LGAI-EXAONE",
"score": 79.1,
"date": "2025-12-26"
},
{
"model_id": "Qwen/Qwen3.5-4B",
"short_name": "Qwen3.5-4B",
"provider": "Qwen",
"score": 76.2,
"date": "2026-02-27"
},
{
"model_id": "nvidia/Nemotron-Cascade-2-30B-A3B",
"short_name": "Nemotron-Cascade-2-30B-A3B",
"provider": "nvidia",
"score": 76.1,
"date": "2026-03-18"
},
{
"model_id": "zai-org/GLM-4.7-Flash",
"short_name": "GLM-4.7-Flash",
"provider": "zai-org",
"score": 75.2,
"date": "2026-01-19"
},
{
"model_id": "jdopensource/JoyAI-LLM-Flash",
"short_name": "JoyAI-LLM-Flash",
"provider": "jdopensource",
"score": 74.43,
"date": "2026-02-14"
},
{
"model_id": "openai/gpt-oss-20b",
"short_name": "gpt-oss-20b",
"provider": "openai",
"score": 74.2,
"date": "2025-08-04"
},
{
"model_id": "deepseek-ai/DeepSeek-R1",
"short_name": "DeepSeek-R1",
"provider": "deepseek-ai",
"score": 71.5,
"date": "2025-01-20"
},
{
"model_id": "mistralai/Mistral-Small-4-119B-2603",
"short_name": "Mistral-Small-4-119B-2603",
"provider": "mistralai",
"score": 71.2,
"date": "2026-01-23"
},
{
"model_id": "Qwen/Qwen3-4B-Thinking-2507",
"short_name": "Qwen3-4B-Thinking-2507",
"provider": "Qwen",
"score": 65.8,
"date": "2025-08-05"
},
{
"model_id": "Qwen/Qwen3-4B-Instruct-2507",
"short_name": "Qwen3-4B-Instruct-2507",
"provider": "Qwen",
"score": 62.0,
"date": "2025-08-05"
},
{
"model_id": "LiquidAI/LFM2.5-1.2B-Instruct",
"short_name": "LFM2.5-1.2B-Instruct",
"provider": "LiquidAI",
"score": 38.89,
"date": "2026-01-06"
},
{
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
"short_name": "Llama-3.1-8B-Instruct",
"provider": "meta-llama",
"score": 30.4,
"date": "2024-07-18"
},
{
"model_id": "Qwen/Qwen3.5-0.8B",
"short_name": "Qwen3.5-0.8B",
"provider": "Qwen",
"score": 11.9,
"date": "2026-02-28"
}
]
},
"hle": {
"name": "HLE",
"models": [
{
"model_id": "zai-org/GLM-5",
"short_name": "GLM-5",
"provider": "zai-org",
"score": 50.4,
"date": "2026-02-11"
},
{
"model_id": "moonshotai/Kimi-K2.5",
"short_name": "Kimi-K2.5",
"provider": "moonshotai",
"score": 50.2,
"date": "2026-01-01"
},
{
"model_id": "Qwen/Qwen3.5-27B",
"short_name": "Qwen3.5-27B",
"provider": "Qwen",
"score": 48.5,
"date": "2026-02-24"
},
{
"model_id": "Qwen/Qwen3.5-397B-A17B",
"short_name": "Qwen3.5-397B-A17B",
"provider": "Qwen",
"score": 48.3,
"date": "2026-02-16"
},
{
"model_id": "Qwen/Qwen3.5-122B-A10B",
"short_name": "Qwen3.5-122B-A10B",
"provider": "Qwen",
"score": 47.5,
"date": "2026-02-24"
},
{
"model_id": "moonshotai/Kimi-K2-Thinking",
"short_name": "Kimi-K2-Thinking",
"provider": "moonshotai",
"score": 44.9,
"date": "2025-11-04"
},
{
"model_id": "zai-org/GLM-4.7",
"short_name": "GLM-4.7",
"provider": "zai-org",
"score": 42.8,
"date": "2025-12-22"
},
{
"model_id": "deepseek-ai/DeepSeek-V3.2",
"short_name": "DeepSeek-V3.2",
"provider": "deepseek-ai",
"score": 40.8,
"date": "2025-12-01"
},
{
"model_id": "miromind-ai/MiroThinker-v1.5-235B",
"short_name": "MiroThinker-v1.5-235B",
"provider": "miromind-ai",
"score": 39.2,
"date": "2026-01-04"
},
{
"model_id": "nvidia/Nemotron-Orchestrator-8B",
"short_name": "Nemotron-Orchestrator-8B",
"provider": "nvidia",
"score": 37.1,
"date": "2025-11-25"
},
{
"model_id": "miromind-ai/MiroThinker-v1.5-30B",
"short_name": "MiroThinker-v1.5-30B",
"provider": "miromind-ai",
"score": 31.0,
"date": "2026-01-04"
},
{
"model_id": "meituan-longcat/LongCat-Flash-Thinking-2601",
"short_name": "LongCat-Flash-Thinking-2601",
"provider": "meituan-longcat",
"score": 25.2,
"date": "2026-01-14"
},
{
"model_id": "stepfun-ai/Step-3.5-Flash",
"short_name": "Step-3.5-Flash",
"provider": "stepfun-ai",
"score": 23.1,
"date": "2026-02-01"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"provider": "nvidia",
"score": 22.82,
"date": "2026-03-10"
},
{
"model_id": "Qwen/Qwen3.5-35B-A3B",
"short_name": "Qwen3.5-35B-A3B",
"provider": "Qwen",
"score": 22.4,
"date": "2026-02-24"
},
{
"model_id": "Nanbeige/Nanbeige4.1-3B",
"short_name": "Nanbeige4.1-3B",
"provider": "Nanbeige",
"score": 22.29,
"date": "2026-02-10"
},
{
"model_id": "MiniMaxAI/MiniMax-M2.1",
"short_name": "MiniMax-M2.1",
"provider": "MiniMaxAI",
"score": 22.2,
"date": "2025-12-20"
},
{
"model_id": "XiaomiMiMo/MiMo-V2-Flash",
"short_name": "MiMo-V2-Flash",
"provider": "XiaomiMiMo",
"score": 22.1,
"date": "2025-12-16"
},
{
"model_id": "MiniMaxAI/MiniMax-M2.5",
"short_name": "MiniMax-M2.5",
"provider": "MiniMaxAI",
"score": 19.4,
"date": "2026-02-12"
},
{
"model_id": "openbmb/AgentCPM-Explore",
"short_name": "AgentCPM-Explore",
"provider": "openbmb",
"score": 19.1,
"date": "2026-01-11"
},
{
"model_id": "openai/gpt-oss-120b",
"short_name": "gpt-oss-120b",
"provider": "openai",
"score": 19.0,
"date": "2025-08-04"
},
{
"model_id": "nvidia/Nemotron-Cascade-2-30B-A3B",
"short_name": "Nemotron-Cascade-2-30B-A3B",
"provider": "nvidia",
"score": 17.7,
"date": "2026-03-18"
},
{
"model_id": "openai/gpt-oss-20b",
"short_name": "gpt-oss-20b",
"provider": "openai",
"score": 17.3,
"date": "2025-08-04"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
"provider": "nvidia",
"score": 15.5,
"date": "2025-12-04"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
"short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-FP8",
"provider": "nvidia",
"score": 15.5,
"date": "2025-12-06"
},
{
"model_id": "zai-org/GLM-4.7-Flash",
"short_name": "GLM-4.7-Flash",
"provider": "zai-org",
"score": 14.4,
"date": "2026-01-19"
},
{
"model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B",
"short_name": "K-EXAONE-236B-A23B",
"provider": "LGAI-EXAONE",
"score": 13.6,
"date": "2025-12-26"
},
{
"model_id": "MiniMaxAI/MiniMax-M2",
"short_name": "MiniMax-M2",
"provider": "MiniMaxAI",
"score": 12.5,
"date": "2025-10-22"
},
{
"model_id": "tiiuae/Falcon-H1R-7B",
"short_name": "Falcon-H1R-7B",
"provider": "tiiuae",
"score": 11.1,
"date": "2025-10-29"
},
{
"model_id": "HelpingAI/Dhanishtha-2.0-0126",
"short_name": "Dhanishtha-2.0-0126",
"provider": "HelpingAI",
"score": 9.92,
"date": "2026-01-01"
}
]
},
"aime2026": {
"name": "AIME 2026",
"models": [
{
"model_id": "stepfun-ai/Step-3.5-Flash",
"short_name": "Step-3.5-Flash",
"provider": "stepfun-ai",
"score": 96.67,
"date": "2026-02-01"
},
{
"model_id": "moonshotai/Kimi-K2.5",
"short_name": "Kimi-K2.5",
"provider": "moonshotai",
"score": 95.83,
"date": "2026-01-01"
},
{
"model_id": "zai-org/GLM-5",
"short_name": "GLM-5",
"provider": "zai-org",
"score": 95.83,
"date": "2026-02-11"
},
{
"model_id": "deepseek-ai/DeepSeek-V3.2",
"short_name": "DeepSeek-V3.2",
"provider": "deepseek-ai",
"score": 94.17,
"date": "2025-12-01"
},
{
"model_id": "Qwen/Qwen3.5-397B-A17B",
"short_name": "Qwen3.5-397B-A17B",
"provider": "Qwen",
"score": 93.33,
"date": "2026-02-16"
},
{
"model_id": "Qwen/Qwen3.5-35B-A3B",
"short_name": "Qwen3.5-35B-A3B",
"provider": "Qwen",
"score": 93.33,
"date": "2026-02-24"
},
{
"model_id": "Qwen/Qwen3.5-9B",
"short_name": "Qwen3.5-9B",
"provider": "Qwen",
"score": 92.5,
"date": "2026-02-27"
},
{
"model_id": "Qwen/Qwen3.5-27B",
"short_name": "Qwen3.5-27B",
"provider": "Qwen",
"score": 90.83,
"date": "2026-02-24"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"provider": "nvidia",
"score": 90.0,
"date": "2026-03-10"
},
{
"model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
"short_name": "Qwen3-30B-A3B-Thinking-2507",
"provider": "Qwen",
"score": 87.5,
"date": "2025-07-29"
},
{
"model_id": "Qwen/Qwen3-4B-Thinking-2507",
"short_name": "Qwen3-4B-Thinking-2507",
"provider": "Qwen",
"score": 82.5,
"date": "2025-08-05"
},
{
"model_id": "lm-provers/QED-Nano",
"short_name": "QED-Nano",
"provider": "lm-provers",
"score": 82.5,
"date": "2026-02-12"
}
]
},
"hmmt2026": {
"name": "HMMT Feb 2026",
"models": [
{
"model_id": "Qwen/Qwen3.5-397B-A17B",
"short_name": "Qwen3.5-397B-A17B",
"provider": "Qwen",
"score": 87.88,
"date": "2026-02-16"
},
{
"model_id": "moonshotai/Kimi-K2.5",
"short_name": "Kimi-K2.5",
"provider": "moonshotai",
"score": 87.12,
"date": "2026-01-01"
},
{
"model_id": "stepfun-ai/Step-3.5-Flash",
"short_name": "Step-3.5-Flash",
"provider": "stepfun-ai",
"score": 86.36,
"date": "2026-02-01"
},
{
"model_id": "zai-org/GLM-5",
"short_name": "GLM-5",
"provider": "zai-org",
"score": 86.36,
"date": "2026-02-11"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"provider": "nvidia",
"score": 84.85,
"date": "2026-03-10"
},
{
"model_id": "deepseek-ai/DeepSeek-V3.2",
"short_name": "DeepSeek-V3.2",
"provider": "deepseek-ai",
"score": 84.09,
"date": "2025-12-01"
},
{
"model_id": "Qwen/Qwen3.5-35B-A3B",
"short_name": "Qwen3.5-35B-A3B",
"provider": "Qwen",
"score": 81.82,
"date": "2026-02-24"
},
{
"model_id": "Qwen/Qwen3.5-27B",
"short_name": "Qwen3.5-27B",
"provider": "Qwen",
"score": 81.06,
"date": "2026-02-24"
},
{
"model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
"short_name": "Qwen3-30B-A3B-Thinking-2507",
"provider": "Qwen",
"score": 78.79,
"date": "2025-07-29"
},
{
"model_id": "Qwen/Qwen3.5-9B",
"short_name": "Qwen3.5-9B",
"provider": "Qwen",
"score": 71.21,
"date": "2026-02-27"
},
{
"model_id": "lm-provers/QED-Nano",
"short_name": "QED-Nano",
"provider": "lm-provers",
"score": 62.88,
"date": "2026-02-12"
},
{
"model_id": "Qwen/Qwen3-4B-Thinking-2507",
"short_name": "Qwen3-4B-Thinking-2507",
"provider": "Qwen",
"score": 53.03,
"date": "2025-08-05"
}
]
},
"olmOcr": {
"name": "olmOCR-bench",
"models": [
{
"model_id": "datalab-to/chandra-ocr-2",
"short_name": "chandra-ocr-2",
"provider": "datalab-to",
"score": 85.9,
"date": "2026-03-16"
},
{
"model_id": "rednote-hilab/dots.mocr",
"short_name": "dots.mocr",
"provider": "rednote-hilab",
"score": 83.9,
"date": "2026-03-19"
},
{
"model_id": "lightonai/LightOnOCR-2-1B",
"short_name": "LightOnOCR-2-1B",
"provider": "lightonai",
"score": 83.2,
"date": "2026-01-16"
},
{
"model_id": "datalab-to/chandra",
"short_name": "chandra",
"provider": "datalab-to",
"score": 83.1,
"date": "2025-10-21"
},
{
"model_id": "infly/Infinity-Parser-7B",
"short_name": "Infinity-Parser-7B",
"provider": "infly",
"score": 82.5,
"date": "2025-10-17"
},
{
"model_id": "allenai/olmOCR-2-7B-1025-FP8",
"short_name": "olmOCR-2-7B-1025-FP8",
"provider": "allenai",
"score": 82.4,
"date": "2025-10-06"
},
{
"model_id": "PaddlePaddle/PaddleOCR-VL",
"short_name": "PaddleOCR-VL",
"provider": "PaddlePaddle",
"score": 80.0,
"date": "2025-10-16"
},
{
"model_id": "baidu/Qianfan-OCR",
"short_name": "Qianfan-OCR",
"provider": "baidu",
"score": 79.8,
"date": "2026-03-18"
},
{
"model_id": "rednote-hilab/dots.ocr",
"short_name": "dots.ocr",
"provider": "rednote-hilab",
"score": 79.1,
"date": "2025-07-30"
},
{
"model_id": "deepseek-ai/DeepSeek-OCR-2",
"short_name": "DeepSeek-OCR-2",
"provider": "deepseek-ai",
"score": 76.3,
"date": "2026-01-27"
},
{
"model_id": "lightonai/LightOnOCR-1B-1025",
"short_name": "LightOnOCR-1B-1025",
"provider": "lightonai",
"score": 76.1,
"date": "2025-10-20"
},
{
"model_id": "deepseek-ai/DeepSeek-OCR",
"short_name": "DeepSeek-OCR",
"provider": "deepseek-ai",
"score": 75.7,
"date": "2025-10-17"
},
{
"model_id": "opendatalab/MinerU2.5-2509-1.2B",
"short_name": "MinerU2.5-2509-1.2B",
"provider": "opendatalab",
"score": 75.2,
"date": "2025-09-17"
},
{
"model_id": "zai-org/GLM-OCR",
"short_name": "GLM-OCR",
"provider": "zai-org",
"score": 75.2,
"date": "2026-01-30"
},
{
"model_id": "FireRedTeam/FireRed-OCR",
"short_name": "FireRed-OCR",
"provider": "FireRedTeam",
"score": 70.2,
"date": "2026-02-28"
},
{
"model_id": "nanonets/Nanonets-OCR2-3B",
"short_name": "Nanonets-OCR2-3B",
"provider": "nanonets",
"score": 69.5,
"date": "2025-10-13"
}
]
},
"terminalBench": {
"name": "Terminal-Bench 2.0",
"models": [
{
"model_id": "Qwen/Qwen3.5-397B-A17B",
"short_name": "Qwen3.5-397B-A17B",
"provider": "Qwen",
"score": 52.5,
"date": "2026-02-16"
},
{
"model_id": "zai-org/GLM-5",
"short_name": "GLM-5",
"provider": "zai-org",
"score": 52.4,
"date": "2026-02-11"
},
{
"model_id": "stepfun-ai/Step-3.5-Flash",
"short_name": "Step-3.5-Flash",
"provider": "stepfun-ai",
"score": 51.0,
"date": "2026-02-01"
},
{
"model_id": "Qwen/Qwen3.5-122B-A10B",
"short_name": "Qwen3.5-122B-A10B",
"provider": "Qwen",
"score": 49.4,
"date": "2026-02-24"
},
{
"model_id": "moonshotai/Kimi-K2.5",
"short_name": "Kimi-K2.5",
"provider": "moonshotai",
"score": 43.2,
"date": "2026-01-01"
},
{
"model_id": "Qwen/Qwen3.5-27B",
"short_name": "Qwen3.5-27B",
"provider": "Qwen",
"score": 41.6,
"date": "2026-02-24"
},
{
"model_id": "Qwen/Qwen3.5-35B-A3B",
"short_name": "Qwen3.5-35B-A3B",
"provider": "Qwen",
"score": 40.5,
"date": "2026-02-24"
},
{
"model_id": "deepseek-ai/DeepSeek-V3.2",
"short_name": "DeepSeek-V3.2",
"provider": "deepseek-ai",
"score": 39.6,
"date": "2025-12-01"
},
{
"model_id": "Qwen/Qwen3-Coder-Next",
"short_name": "Qwen3-Coder-Next",
"provider": "Qwen",
"score": 36.2,
"date": "2026-01-30"
},
{
"model_id": "moonshotai/Kimi-K2-Thinking",
"short_name": "Kimi-K2-Thinking",
"provider": "moonshotai",
"score": 35.7,
"date": "2025-11-04"
},
{
"model_id": "zai-org/GLM-4.7",
"short_name": "GLM-4.7",
"provider": "zai-org",
"score": 33.4,
"date": "2025-12-22"
},
{
"model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
"provider": "nvidia",
"score": 31.0,
"date": "2026-03-10"
},
{
"model_id": "MiniMaxAI/MiniMax-M2",
"short_name": "MiniMax-M2",
"provider": "MiniMaxAI",
"score": 30.0,
"date": "2025-10-22"
},
{
"model_id": "MiniMaxAI/MiniMax-M2.1",
"short_name": "MiniMax-M2.1",
"provider": "MiniMaxAI",
"score": 29.2,
"date": "2025-12-20"
},
{
"model_id": "moonshotai/Kimi-K2-Instruct",
"short_name": "Kimi-K2-Instruct",
"provider": "moonshotai",
"score": 27.8,
"date": "2025-07-11"
},
{
"model_id": "nvidia/Nemotron-Terminal-32B",
"short_name": "Nemotron-Terminal-32B",
"provider": "nvidia",
"score": 27.4,
"date": "2026-02-17"
},
{
"model_id": "zai-org/GLM-4.6",
"short_name": "GLM-4.6",
"provider": "zai-org",
"score": 24.5,
"date": "2025-09-29"
},
{
"model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
"short_name": "Qwen3-Coder-480B-A35B-Instruct",
"provider": "Qwen",
"score": 23.9,
"date": "2025-07-22"
},
{
"model_id": "nvidia/Nemotron-Terminal-14B",
"short_name": "Nemotron-Terminal-14B",
"provider": "nvidia",
"score": 20.2,
"date": "2026-02-17"
},
{
"model_id": "nvidia/Nemotron-Terminal-8B",
"short_name": "Nemotron-Terminal-8B",
"provider": "nvidia",
"score": 13.0,
"date": "2026-02-17"
}
]
},
"evasionBench": {
"name": "EvasionBench",
"models": [
{
"model_id": "zai-org/GLM-4.7",
"short_name": "GLM-4.7",
"provider": "zai-org",
"score": 82.91,
"date": "2025-12-22"
},
{
"model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
"short_name": "Qwen3-Coder-480B-A35B-Instruct",
"provider": "Qwen",
"score": 78.16,
"date": "2025-07-22"
},
{
"model_id": "MiniMaxAI/MiniMax-M2.1",
"short_name": "MiniMax-M2.1",
"provider": "MiniMaxAI",
"score": 71.31,
"date": "2025-12-20"
},
{
"model_id": "deepseek-ai/DeepSeek-V3.2",
"short_name": "DeepSeek-V3.2",
"provider": "deepseek-ai",
"score": 66.88,
"date": "2025-12-01"
},
{
"model_id": "moonshotai/Kimi-K2-Instruct-0905",
"short_name": "Kimi-K2-Instruct-0905",
"provider": "moonshotai",
"score": 66.68,
"date": "2025-09-03"
}
]
}
},
"logos": {
"miromind-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/682c41fb2f8a52030ec93ce0/Cna52_IapEXuNBsyI3lvR.png",
"opendatalab": "https://cdn-avatars.huggingface.co/v1/production/uploads/639c3afa7432f2f5d16b7296/yqxxBknyeqkGnYsjoaR4M.png",
"HelpingAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/6612aedf09f16e7347dfa7e1/jHRLPBTlyykFwrd6-Mak_.png",
"FireRedTeam": "https://cdn-avatars.huggingface.co/v1/production/uploads/66ec07ef12bd743cfe91004e/PK3bgl6aF2RzW1QFKkq8R.png",
"baidu": "https://cdn-avatars.huggingface.co/v1/production/uploads/64f187a2cc1c03340ac30498/TYYUxK8xD1AxExFMWqbZD.png",
"facebook": "https://cdn-avatars.huggingface.co/v1/production/uploads/1592839207516-noauth.png",
"meta-llama": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png",
"stepfun-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/66935cee39002fc0569c2943/Qv8QPbkgoKE3wR4jTzHiy.png",
"mistralai": "https://cdn-avatars.huggingface.co/v1/production/uploads/634c17653d11eaedd88b314d/9OgyfKstSZtbmsmuG8MbU.png",
"GAIR": "https://cdn-avatars.huggingface.co/v1/production/uploads/6144a0c4ff1146bbd84d9865/NqAuVddq2ci-AsFcFNbav.png",
"XiaomiMiMo": "https://cdn-avatars.huggingface.co/v1/production/uploads/680cb7d1233834890a64acee/5w_4aLfF-7MAyaIPOV498.jpeg",
"nvidia": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png",
"LGAI-EXAONE": "https://cdn-avatars.huggingface.co/v1/production/uploads/66a899a72f11aaf66001a8dc/UfdrP3GMo9pNT62BaMnhw.png",
"jdopensource": "https://cdn-avatars.huggingface.co/v1/production/uploads/68c0e2ab44ea28a974e3074b/g-4gTubd16qUtwmGZ0n4h.png",
"Qwen": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png",
"tiiuae": "https://cdn-avatars.huggingface.co/v1/production/uploads/61a8d1aac664736898ffc84f/AT6cAB5ZNwCcqFMal71WD.jpeg",
"LiquidAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/EsTgVtnM2IqVRKgPdfqcB.png",
"allenai": "https://cdn-avatars.huggingface.co/v1/production/uploads/652db071b62cf1f8463221e2/CxxwFiaomTa1MCX_B7-pT.png",
"SWE-Lego": "https://cdn-avatars.huggingface.co/v1/production/uploads/60fc2fcca6bdebbe52dfdaf4/AeuYwUH-CQCt893qnmAGa.png",
"datalab-to": "https://cdn-avatars.huggingface.co/v1/production/uploads/67ab6afe315e622f597bf9e8/YOgg0gVYVXZC1PDIHFTWK.png",
"PaddlePaddle": "https://cdn-avatars.huggingface.co/v1/production/uploads/1654942635336-5f3ff69679c1ba4c353d0c5a.png",
"nanonets": "https://cdn-avatars.huggingface.co/v1/production/uploads/641fc216a390e539522d511f/Xtxh40e8zSzkuKtCr58DH.jpeg",
"Nanbeige": "https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png",
"infly": "https://cdn-avatars.huggingface.co/v1/production/uploads/63ed9862679c2cc40abb55d2/0n6g0jngiKkRjaEoAvPmM.png",
"openai": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png",
"meituan-longcat": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png",
"lm-provers": "https://cdn-avatars.huggingface.co/v1/production/uploads/5f0c746619cb630495b814fd/Td4sH4W-LIdR89AqHCuw3.jpeg",
"openbmb": "https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png",
"arcee-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6435718aaaef013d1aec3b8b/GZPnGkfMn8Ino6JbkL4fJ.png",
"lightonai": "https://cdn-avatars.huggingface.co/v1/production/uploads/1651597775471-62715572ab9243b5d40cbb1d.png",
"MiniMaxAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg",
"rednote-hilab": "https://cdn-avatars.huggingface.co/v1/production/uploads/6807a1d6504547b3554b9c73/WgnnQDsz7FqnyTtv8mmRO.png",
"google": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png",
"zai-org": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png",
"deepseek-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png",
"moonshotai": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg"
},
"colors": {
"FireRedTeam": "#6366f1",
"GAIR": "#0d9488",
"HelpingAI": "#d97706",
"LGAI-EXAONE": "#e11d48",
"LiquidAI": "#7c3aed",
"MiniMaxAI": "#16a34a",
"Nanbeige": "#2563eb",
"PaddlePaddle": "#ea580c",
"Qwen": "#8b5cf6",
"SWE-Lego": "#0891b2",
"XiaomiMiMo": "#c026d3",
"allenai": "#65a30d",
"arcee-ai": "#dc2626",
"baidu": "#0284c7",
"datalab-to": "#a21caf",
"deepseek-ai": "#059669",
"facebook": "#9333ea",
"google": "#ca8a04",
"infly": "#be185d",
"jdopensource": "#0369a1",
"lightonai": "#6366f1",
"lm-provers": "#0d9488",
"meituan-longcat": "#d97706",
"meta-llama": "#e11d48",
"miromind-ai": "#7c3aed",
"mistralai": "#16a34a",
"moonshotai": "#2563eb",
"nanonets": "#ea580c",
"nvidia": "#8b5cf6",
"openai": "#0891b2",
"openbmb": "#c026d3",
"opendatalab": "#65a30d",
"rednote-hilab": "#dc2626",
"stepfun-ai": "#0284c7",
"tiiuae": "#a21caf",
"zai-org": "#059669"
},
"generated_at": "2026-03-21T20:00:33.241053+00:00"
}