Spaces:
Running
Running
| { | |
| "benchmarks": { | |
| "sweVerified": { | |
| "name": "SWE-bench Verified", | |
| "models": [ | |
| { | |
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |
| "short_name": "Qwen3.5-397B-A17B", | |
| "provider": "Qwen", | |
| "score": 76.4, | |
| "date": "2026-02-16" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.5", | |
| "short_name": "MiniMax-M2.5", | |
| "provider": "MiniMaxAI", | |
| "score": 75.8, | |
| "date": "2026-02-12" | |
| }, | |
| { | |
| "model_id": "stepfun-ai/Step-3.5-Flash", | |
| "short_name": "Step-3.5-Flash", | |
| "provider": "stepfun-ai", | |
| "score": 74.4, | |
| "date": "2026-02-01" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |
| "short_name": "MiniMax-M2.1", | |
| "provider": "MiniMaxAI", | |
| "score": 74.0, | |
| "date": "2025-12-20" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.7", | |
| "short_name": "GLM-4.7", | |
| "provider": "zai-org", | |
| "score": 73.8, | |
| "date": "2025-12-22" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-5", | |
| "short_name": "GLM-5", | |
| "provider": "zai-org", | |
| "score": 72.8, | |
| "date": "2026-02-11" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-27B", | |
| "short_name": "Qwen3.5-27B", | |
| "provider": "Qwen", | |
| "score": 72.4, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |
| "short_name": "Qwen3.5-122B-A10B", | |
| "provider": "Qwen", | |
| "score": 72.0, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2-Thinking", | |
| "short_name": "Kimi-K2-Thinking", | |
| "provider": "moonshotai", | |
| "score": 71.3, | |
| "date": "2025-11-04" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2.5", | |
| "short_name": "Kimi-K2.5", | |
| "provider": "moonshotai", | |
| "score": 70.8, | |
| "date": "2026-01-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-Coder-Next", | |
| "short_name": "Qwen3-Coder-Next", | |
| "provider": "Qwen", | |
| "score": 70.6, | |
| "date": "2026-01-30" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |
| "short_name": "DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "score": 70.0, | |
| "date": "2025-12-01" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2", | |
| "short_name": "MiniMax-M2", | |
| "provider": "MiniMaxAI", | |
| "score": 69.4, | |
| "date": "2025-10-22" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |
| "short_name": "Qwen3.5-35B-A3B", | |
| "provider": "Qwen", | |
| "score": 69.2, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "GAIR/OpenSWE-72B", | |
| "short_name": "OpenSWE-72B", | |
| "provider": "GAIR", | |
| "score": 66.0, | |
| "date": "2026-03-15" | |
| }, | |
| { | |
| "model_id": "openai/gpt-oss-120b", | |
| "short_name": "gpt-oss-120b", | |
| "provider": "openai", | |
| "score": 62.4, | |
| "date": "2025-08-04" | |
| }, | |
| { | |
| "model_id": "GAIR/OpenSWE-32B", | |
| "short_name": "OpenSWE-32B", | |
| "provider": "GAIR", | |
| "score": 62.4, | |
| "date": "2026-03-15" | |
| }, | |
| { | |
| "model_id": "openai/gpt-oss-20b", | |
| "short_name": "gpt-oss-20b", | |
| "provider": "openai", | |
| "score": 60.7, | |
| "date": "2025-08-04" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "provider": "nvidia", | |
| "score": 60.47, | |
| "date": "2026-03-10" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.7-Flash", | |
| "short_name": "GLM-4.7-Flash", | |
| "provider": "zai-org", | |
| "score": 59.2, | |
| "date": "2026-01-19" | |
| }, | |
| { | |
| "model_id": "facebook/cwm", | |
| "short_name": "cwm", | |
| "provider": "facebook", | |
| "score": 53.9, | |
| "date": "2025-08-25" | |
| }, | |
| { | |
| "model_id": "SWE-Lego/SWE-Lego-Qwen3-32B", | |
| "short_name": "SWE-Lego-Qwen3-32B", | |
| "provider": "SWE-Lego", | |
| "score": 52.6, | |
| "date": "2026-01-05" | |
| }, | |
| { | |
| "model_id": "SWE-Lego/SWE-Lego-Qwen3-8B", | |
| "short_name": "SWE-Lego-Qwen3-8B", | |
| "provider": "SWE-Lego", | |
| "score": 42.2, | |
| "date": "2025-12-29" | |
| } | |
| ] | |
| }, | |
| "swePro": { | |
| "name": "SWE-bench Pro", | |
| "models": [ | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.5", | |
| "short_name": "MiniMax-M2.5", | |
| "provider": "MiniMaxAI", | |
| "score": 55.4, | |
| "date": "2026-02-12" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2.5", | |
| "short_name": "Kimi-K2.5", | |
| "provider": "moonshotai", | |
| "score": 50.7, | |
| "date": "2026-01-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-Coder-Next", | |
| "short_name": "Qwen3-Coder-Next", | |
| "provider": "Qwen", | |
| "score": 44.3, | |
| "date": "2026-01-30" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", | |
| "short_name": "Qwen3-Coder-480B-A35B-Instruct", | |
| "provider": "Qwen", | |
| "score": 38.7, | |
| "date": "2025-07-22" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |
| "short_name": "MiniMax-M2.1", | |
| "provider": "MiniMaxAI", | |
| "score": 36.81, | |
| "date": "2025-12-20" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2-Instruct", | |
| "short_name": "Kimi-K2-Instruct", | |
| "provider": "moonshotai", | |
| "score": 27.67, | |
| "date": "2025-07-11" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-235B-A22B", | |
| "short_name": "Qwen3-235B-A22B", | |
| "provider": "Qwen", | |
| "score": 21.41, | |
| "date": "2025-04-27" | |
| }, | |
| { | |
| "model_id": "openai/gpt-oss-120b", | |
| "short_name": "gpt-oss-120b", | |
| "provider": "openai", | |
| "score": 16.2, | |
| "date": "2025-08-04" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |
| "short_name": "DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "score": 15.56, | |
| "date": "2025-12-01" | |
| }, | |
| { | |
| "model_id": "google/gemma-3-27b-it", | |
| "short_name": "gemma-3-27b-it", | |
| "provider": "google", | |
| "score": 11.38, | |
| "date": "2025-03-01" | |
| }, | |
| { | |
| "model_id": "meta-llama/Llama-3.1-405B-Instruct", | |
| "short_name": "Llama-3.1-405B-Instruct", | |
| "provider": "meta-llama", | |
| "score": 11.18, | |
| "date": "2024-07-16" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.6", | |
| "short_name": "GLM-4.6", | |
| "provider": "zai-org", | |
| "score": 9.67, | |
| "date": "2025-09-29" | |
| }, | |
| { | |
| "model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", | |
| "short_name": "Llama-4-Maverick-17B-128E-Instruct", | |
| "provider": "meta-llama", | |
| "score": 5.24, | |
| "date": "2025-04-01" | |
| } | |
| ] | |
| }, | |
| "mmluPro": { | |
| "name": "MMLU-Pro", | |
| "models": [ | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |
| "short_name": "MiniMax-M2.1", | |
| "provider": "MiniMaxAI", | |
| "score": 88.0, | |
| "date": "2025-12-20" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |
| "short_name": "Qwen3.5-397B-A17B", | |
| "provider": "Qwen", | |
| "score": 87.8, | |
| "date": "2026-02-16" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2.5", | |
| "short_name": "Kimi-K2.5", | |
| "provider": "moonshotai", | |
| "score": 87.1, | |
| "date": "2026-01-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |
| "short_name": "Qwen3.5-122B-A10B", | |
| "provider": "Qwen", | |
| "score": 86.7, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-27B", | |
| "short_name": "Qwen3.5-27B", | |
| "provider": "Qwen", | |
| "score": 86.1, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |
| "short_name": "Qwen3.5-35B-A3B", | |
| "provider": "Qwen", | |
| "score": 85.3, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-R1-0528", | |
| "short_name": "DeepSeek-R1-0528", | |
| "provider": "deepseek-ai", | |
| "score": 85.0, | |
| "date": "2025-05-28" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |
| "short_name": "DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "score": 85.0, | |
| "date": "2025-12-01" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2-Thinking", | |
| "short_name": "Kimi-K2-Thinking", | |
| "provider": "moonshotai", | |
| "score": 84.6, | |
| "date": "2025-11-04" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", | |
| "short_name": "Qwen3-235B-A22B-Thinking-2507", | |
| "provider": "Qwen", | |
| "score": 84.4, | |
| "date": "2025-07-25" | |
| }, | |
| { | |
| "model_id": "stepfun-ai/Step-3.5-Flash", | |
| "short_name": "Step-3.5-Flash", | |
| "provider": "stepfun-ai", | |
| "score": 84.4, | |
| "date": "2026-02-01" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.7", | |
| "short_name": "GLM-4.7", | |
| "provider": "zai-org", | |
| "score": 84.3, | |
| "date": "2025-12-22" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-R1", | |
| "short_name": "DeepSeek-R1", | |
| "provider": "deepseek-ai", | |
| "score": 84.0, | |
| "date": "2025-01-20" | |
| }, | |
| { | |
| "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", | |
| "short_name": "K-EXAONE-236B-A23B", | |
| "provider": "LGAI-EXAONE", | |
| "score": 83.8, | |
| "date": "2025-12-26" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "provider": "nvidia", | |
| "score": 83.73, | |
| "date": "2026-03-10" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-9B", | |
| "short_name": "Qwen3.5-9B", | |
| "provider": "Qwen", | |
| "score": 82.5, | |
| "date": "2026-02-27" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2", | |
| "short_name": "MiniMax-M2", | |
| "provider": "MiniMaxAI", | |
| "score": 82.0, | |
| "date": "2025-10-22" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3-0324", | |
| "short_name": "DeepSeek-V3-0324", | |
| "provider": "deepseek-ai", | |
| "score": 81.2, | |
| "date": "2025-03-24" | |
| }, | |
| { | |
| "model_id": "jdopensource/JoyAI-LLM-Flash", | |
| "short_name": "JoyAI-LLM-Flash", | |
| "provider": "jdopensource", | |
| "score": 81.02, | |
| "date": "2026-02-14" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", | |
| "short_name": "Qwen3-Next-80B-A3B-Instruct", | |
| "provider": "Qwen", | |
| "score": 80.6, | |
| "date": "2025-09-09" | |
| }, | |
| { | |
| "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", | |
| "short_name": "Nemotron-Cascade-2-30B-A3B", | |
| "provider": "nvidia", | |
| "score": 79.8, | |
| "date": "2026-03-18" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-4B", | |
| "short_name": "Qwen3.5-4B", | |
| "provider": "Qwen", | |
| "score": 79.1, | |
| "date": "2026-02-27" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", | |
| "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", | |
| "provider": "nvidia", | |
| "score": 78.3, | |
| "date": "2025-12-04" | |
| }, | |
| { | |
| "model_id": "meituan-longcat/LongCat-Flash-Lite", | |
| "short_name": "LongCat-Flash-Lite", | |
| "provider": "meituan-longcat", | |
| "score": 78.29, | |
| "date": "2026-01-27" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", | |
| "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", | |
| "provider": "nvidia", | |
| "score": 78.1, | |
| "date": "2025-12-06" | |
| }, | |
| { | |
| "model_id": "mistralai/Mistral-Small-4-119B-2603", | |
| "short_name": "Mistral-Small-4-119B-2603", | |
| "provider": "mistralai", | |
| "score": 78.0, | |
| "date": "2026-01-23" | |
| }, | |
| { | |
| "model_id": "arcee-ai/Trinity-Large-Preview", | |
| "short_name": "Trinity-Large-Preview", | |
| "provider": "arcee-ai", | |
| "score": 75.2, | |
| "date": "2026-01-27" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-4B-Thinking-2507", | |
| "short_name": "Qwen3-4B-Thinking-2507", | |
| "provider": "Qwen", | |
| "score": 74.0, | |
| "date": "2025-08-05" | |
| }, | |
| { | |
| "model_id": "tiiuae/Falcon-H1R-7B", | |
| "short_name": "Falcon-H1R-7B", | |
| "provider": "tiiuae", | |
| "score": 72.1, | |
| "date": "2025-10-29" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-4B-Instruct-2507", | |
| "short_name": "Qwen3-4B-Instruct-2507", | |
| "provider": "Qwen", | |
| "score": 69.6, | |
| "date": "2025-08-05" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3", | |
| "short_name": "DeepSeek-V3", | |
| "provider": "deepseek-ai", | |
| "score": 64.4, | |
| "date": "2024-12-25" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-2B", | |
| "short_name": "Qwen3.5-2B", | |
| "provider": "Qwen", | |
| "score": 55.3, | |
| "date": "2026-02-28" | |
| }, | |
| { | |
| "model_id": "meta-llama/Llama-3.1-8B-Instruct", | |
| "short_name": "Llama-3.1-8B-Instruct", | |
| "provider": "meta-llama", | |
| "score": 48.3, | |
| "date": "2024-07-18" | |
| }, | |
| { | |
| "model_id": "LiquidAI/LFM2.5-1.2B-Instruct", | |
| "short_name": "LFM2.5-1.2B-Instruct", | |
| "provider": "LiquidAI", | |
| "score": 44.35, | |
| "date": "2026-01-06" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-0.8B", | |
| "short_name": "Qwen3.5-0.8B", | |
| "provider": "Qwen", | |
| "score": 29.7, | |
| "date": "2026-02-28" | |
| } | |
| ] | |
| }, | |
| "gpqa": { | |
| "name": "GPQA Diamond", | |
| "models": [ | |
| { | |
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |
| "short_name": "Qwen3.5-397B-A17B", | |
| "provider": "Qwen", | |
| "score": 88.4, | |
| "date": "2026-02-16" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2.5", | |
| "short_name": "Kimi-K2.5", | |
| "provider": "moonshotai", | |
| "score": 87.6, | |
| "date": "2026-01-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |
| "short_name": "Qwen3.5-122B-A10B", | |
| "provider": "Qwen", | |
| "score": 86.6, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-5", | |
| "short_name": "GLM-5", | |
| "provider": "zai-org", | |
| "score": 86.0, | |
| "date": "2026-02-11" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.7", | |
| "short_name": "GLM-4.7", | |
| "provider": "zai-org", | |
| "score": 85.7, | |
| "date": "2025-12-22" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-27B", | |
| "short_name": "Qwen3.5-27B", | |
| "provider": "Qwen", | |
| "score": 85.5, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.5", | |
| "short_name": "MiniMax-M2.5", | |
| "provider": "MiniMaxAI", | |
| "score": 85.2, | |
| "date": "2026-02-12" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2-Thinking", | |
| "short_name": "Kimi-K2-Thinking", | |
| "provider": "moonshotai", | |
| "score": 84.5, | |
| "date": "2025-11-04" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |
| "short_name": "Qwen3.5-35B-A3B", | |
| "provider": "Qwen", | |
| "score": 84.2, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Nanbeige/Nanbeige4.1-3B", | |
| "short_name": "Nanbeige4.1-3B", | |
| "provider": "Nanbeige", | |
| "score": 83.8, | |
| "date": "2026-02-10" | |
| }, | |
| { | |
| "model_id": "stepfun-ai/Step-3.5-Flash", | |
| "short_name": "Step-3.5-Flash", | |
| "provider": "stepfun-ai", | |
| "score": 83.5, | |
| "date": "2026-02-01" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "provider": "nvidia", | |
| "score": 82.7, | |
| "date": "2026-03-10" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |
| "short_name": "DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "score": 82.4, | |
| "date": "2025-12-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-9B", | |
| "short_name": "Qwen3.5-9B", | |
| "provider": "Qwen", | |
| "score": 81.7, | |
| "date": "2026-02-27" | |
| }, | |
| { | |
| "model_id": "openai/gpt-oss-120b", | |
| "short_name": "gpt-oss-120b", | |
| "provider": "openai", | |
| "score": 80.9, | |
| "date": "2025-08-04" | |
| }, | |
| { | |
| "model_id": "meituan-longcat/LongCat-Flash-Thinking-2601", | |
| "short_name": "LongCat-Flash-Thinking-2601", | |
| "provider": "meituan-longcat", | |
| "score": 80.5, | |
| "date": "2026-01-14" | |
| }, | |
| { | |
| "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", | |
| "short_name": "K-EXAONE-236B-A23B", | |
| "provider": "LGAI-EXAONE", | |
| "score": 79.1, | |
| "date": "2025-12-26" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-4B", | |
| "short_name": "Qwen3.5-4B", | |
| "provider": "Qwen", | |
| "score": 76.2, | |
| "date": "2026-02-27" | |
| }, | |
| { | |
| "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", | |
| "short_name": "Nemotron-Cascade-2-30B-A3B", | |
| "provider": "nvidia", | |
| "score": 76.1, | |
| "date": "2026-03-18" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.7-Flash", | |
| "short_name": "GLM-4.7-Flash", | |
| "provider": "zai-org", | |
| "score": 75.2, | |
| "date": "2026-01-19" | |
| }, | |
| { | |
| "model_id": "jdopensource/JoyAI-LLM-Flash", | |
| "short_name": "JoyAI-LLM-Flash", | |
| "provider": "jdopensource", | |
| "score": 74.43, | |
| "date": "2026-02-14" | |
| }, | |
| { | |
| "model_id": "openai/gpt-oss-20b", | |
| "short_name": "gpt-oss-20b", | |
| "provider": "openai", | |
| "score": 74.2, | |
| "date": "2025-08-04" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-R1", | |
| "short_name": "DeepSeek-R1", | |
| "provider": "deepseek-ai", | |
| "score": 71.5, | |
| "date": "2025-01-20" | |
| }, | |
| { | |
| "model_id": "mistralai/Mistral-Small-4-119B-2603", | |
| "short_name": "Mistral-Small-4-119B-2603", | |
| "provider": "mistralai", | |
| "score": 71.2, | |
| "date": "2026-01-23" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-4B-Thinking-2507", | |
| "short_name": "Qwen3-4B-Thinking-2507", | |
| "provider": "Qwen", | |
| "score": 65.8, | |
| "date": "2025-08-05" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-4B-Instruct-2507", | |
| "short_name": "Qwen3-4B-Instruct-2507", | |
| "provider": "Qwen", | |
| "score": 62.0, | |
| "date": "2025-08-05" | |
| }, | |
| { | |
| "model_id": "LiquidAI/LFM2.5-1.2B-Instruct", | |
| "short_name": "LFM2.5-1.2B-Instruct", | |
| "provider": "LiquidAI", | |
| "score": 38.89, | |
| "date": "2026-01-06" | |
| }, | |
| { | |
| "model_id": "meta-llama/Llama-3.1-8B-Instruct", | |
| "short_name": "Llama-3.1-8B-Instruct", | |
| "provider": "meta-llama", | |
| "score": 30.4, | |
| "date": "2024-07-18" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-0.8B", | |
| "short_name": "Qwen3.5-0.8B", | |
| "provider": "Qwen", | |
| "score": 11.9, | |
| "date": "2026-02-28" | |
| } | |
| ] | |
| }, | |
| "hle": { | |
| "name": "HLE", | |
| "models": [ | |
| { | |
| "model_id": "zai-org/GLM-5", | |
| "short_name": "GLM-5", | |
| "provider": "zai-org", | |
| "score": 50.4, | |
| "date": "2026-02-11" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2.5", | |
| "short_name": "Kimi-K2.5", | |
| "provider": "moonshotai", | |
| "score": 50.2, | |
| "date": "2026-01-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-27B", | |
| "short_name": "Qwen3.5-27B", | |
| "provider": "Qwen", | |
| "score": 48.5, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |
| "short_name": "Qwen3.5-397B-A17B", | |
| "provider": "Qwen", | |
| "score": 48.3, | |
| "date": "2026-02-16" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |
| "short_name": "Qwen3.5-122B-A10B", | |
| "provider": "Qwen", | |
| "score": 47.5, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2-Thinking", | |
| "short_name": "Kimi-K2-Thinking", | |
| "provider": "moonshotai", | |
| "score": 44.9, | |
| "date": "2025-11-04" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.7", | |
| "short_name": "GLM-4.7", | |
| "provider": "zai-org", | |
| "score": 42.8, | |
| "date": "2025-12-22" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |
| "short_name": "DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "score": 40.8, | |
| "date": "2025-12-01" | |
| }, | |
| { | |
| "model_id": "miromind-ai/MiroThinker-v1.5-235B", | |
| "short_name": "MiroThinker-v1.5-235B", | |
| "provider": "miromind-ai", | |
| "score": 39.2, | |
| "date": "2026-01-04" | |
| }, | |
| { | |
| "model_id": "nvidia/Nemotron-Orchestrator-8B", | |
| "short_name": "Nemotron-Orchestrator-8B", | |
| "provider": "nvidia", | |
| "score": 37.1, | |
| "date": "2025-11-25" | |
| }, | |
| { | |
| "model_id": "miromind-ai/MiroThinker-v1.5-30B", | |
| "short_name": "MiroThinker-v1.5-30B", | |
| "provider": "miromind-ai", | |
| "score": 31.0, | |
| "date": "2026-01-04" | |
| }, | |
| { | |
| "model_id": "meituan-longcat/LongCat-Flash-Thinking-2601", | |
| "short_name": "LongCat-Flash-Thinking-2601", | |
| "provider": "meituan-longcat", | |
| "score": 25.2, | |
| "date": "2026-01-14" | |
| }, | |
| { | |
| "model_id": "stepfun-ai/Step-3.5-Flash", | |
| "short_name": "Step-3.5-Flash", | |
| "provider": "stepfun-ai", | |
| "score": 23.1, | |
| "date": "2026-02-01" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "provider": "nvidia", | |
| "score": 22.82, | |
| "date": "2026-03-10" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |
| "short_name": "Qwen3.5-35B-A3B", | |
| "provider": "Qwen", | |
| "score": 22.4, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Nanbeige/Nanbeige4.1-3B", | |
| "short_name": "Nanbeige4.1-3B", | |
| "provider": "Nanbeige", | |
| "score": 22.29, | |
| "date": "2026-02-10" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |
| "short_name": "MiniMax-M2.1", | |
| "provider": "MiniMaxAI", | |
| "score": 22.2, | |
| "date": "2025-12-20" | |
| }, | |
| { | |
| "model_id": "XiaomiMiMo/MiMo-V2-Flash", | |
| "short_name": "MiMo-V2-Flash", | |
| "provider": "XiaomiMiMo", | |
| "score": 22.1, | |
| "date": "2025-12-16" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.5", | |
| "short_name": "MiniMax-M2.5", | |
| "provider": "MiniMaxAI", | |
| "score": 19.4, | |
| "date": "2026-02-12" | |
| }, | |
| { | |
| "model_id": "openbmb/AgentCPM-Explore", | |
| "short_name": "AgentCPM-Explore", | |
| "provider": "openbmb", | |
| "score": 19.1, | |
| "date": "2026-01-11" | |
| }, | |
| { | |
| "model_id": "openai/gpt-oss-120b", | |
| "short_name": "gpt-oss-120b", | |
| "provider": "openai", | |
| "score": 19.0, | |
| "date": "2025-08-04" | |
| }, | |
| { | |
| "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", | |
| "short_name": "Nemotron-Cascade-2-30B-A3B", | |
| "provider": "nvidia", | |
| "score": 17.7, | |
| "date": "2026-03-18" | |
| }, | |
| { | |
| "model_id": "openai/gpt-oss-20b", | |
| "short_name": "gpt-oss-20b", | |
| "provider": "openai", | |
| "score": 17.3, | |
| "date": "2025-08-04" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", | |
| "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", | |
| "provider": "nvidia", | |
| "score": 15.5, | |
| "date": "2025-12-04" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", | |
| "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", | |
| "provider": "nvidia", | |
| "score": 15.5, | |
| "date": "2025-12-06" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.7-Flash", | |
| "short_name": "GLM-4.7-Flash", | |
| "provider": "zai-org", | |
| "score": 14.4, | |
| "date": "2026-01-19" | |
| }, | |
| { | |
| "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", | |
| "short_name": "K-EXAONE-236B-A23B", | |
| "provider": "LGAI-EXAONE", | |
| "score": 13.6, | |
| "date": "2025-12-26" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2", | |
| "short_name": "MiniMax-M2", | |
| "provider": "MiniMaxAI", | |
| "score": 12.5, | |
| "date": "2025-10-22" | |
| }, | |
| { | |
| "model_id": "tiiuae/Falcon-H1R-7B", | |
| "short_name": "Falcon-H1R-7B", | |
| "provider": "tiiuae", | |
| "score": 11.1, | |
| "date": "2025-10-29" | |
| }, | |
| { | |
| "model_id": "HelpingAI/Dhanishtha-2.0-0126", | |
| "short_name": "Dhanishtha-2.0-0126", | |
| "provider": "HelpingAI", | |
| "score": 9.92, | |
| "date": "2026-01-01" | |
| } | |
| ] | |
| }, | |
| "aime2026": { | |
| "name": "AIME 2026", | |
| "models": [ | |
| { | |
| "model_id": "stepfun-ai/Step-3.5-Flash", | |
| "short_name": "Step-3.5-Flash", | |
| "provider": "stepfun-ai", | |
| "score": 96.67, | |
| "date": "2026-02-01" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2.5", | |
| "short_name": "Kimi-K2.5", | |
| "provider": "moonshotai", | |
| "score": 95.83, | |
| "date": "2026-01-01" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-5", | |
| "short_name": "GLM-5", | |
| "provider": "zai-org", | |
| "score": 95.83, | |
| "date": "2026-02-11" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |
| "short_name": "DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "score": 94.17, | |
| "date": "2025-12-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |
| "short_name": "Qwen3.5-397B-A17B", | |
| "provider": "Qwen", | |
| "score": 93.33, | |
| "date": "2026-02-16" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |
| "short_name": "Qwen3.5-35B-A3B", | |
| "provider": "Qwen", | |
| "score": 93.33, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-9B", | |
| "short_name": "Qwen3.5-9B", | |
| "provider": "Qwen", | |
| "score": 92.5, | |
| "date": "2026-02-27" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-27B", | |
| "short_name": "Qwen3.5-27B", | |
| "provider": "Qwen", | |
| "score": 90.83, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "provider": "nvidia", | |
| "score": 90.0, | |
| "date": "2026-03-10" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", | |
| "short_name": "Qwen3-30B-A3B-Thinking-2507", | |
| "provider": "Qwen", | |
| "score": 87.5, | |
| "date": "2025-07-29" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-4B-Thinking-2507", | |
| "short_name": "Qwen3-4B-Thinking-2507", | |
| "provider": "Qwen", | |
| "score": 82.5, | |
| "date": "2025-08-05" | |
| }, | |
| { | |
| "model_id": "lm-provers/QED-Nano", | |
| "short_name": "QED-Nano", | |
| "provider": "lm-provers", | |
| "score": 82.5, | |
| "date": "2026-02-12" | |
| } | |
| ] | |
| }, | |
| "hmmt2026": { | |
| "name": "HMMT Feb 2026", | |
| "models": [ | |
| { | |
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |
| "short_name": "Qwen3.5-397B-A17B", | |
| "provider": "Qwen", | |
| "score": 87.88, | |
| "date": "2026-02-16" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2.5", | |
| "short_name": "Kimi-K2.5", | |
| "provider": "moonshotai", | |
| "score": 87.12, | |
| "date": "2026-01-01" | |
| }, | |
| { | |
| "model_id": "stepfun-ai/Step-3.5-Flash", | |
| "short_name": "Step-3.5-Flash", | |
| "provider": "stepfun-ai", | |
| "score": 86.36, | |
| "date": "2026-02-01" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-5", | |
| "short_name": "GLM-5", | |
| "provider": "zai-org", | |
| "score": 86.36, | |
| "date": "2026-02-11" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "provider": "nvidia", | |
| "score": 84.85, | |
| "date": "2026-03-10" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |
| "short_name": "DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "score": 84.09, | |
| "date": "2025-12-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |
| "short_name": "Qwen3.5-35B-A3B", | |
| "provider": "Qwen", | |
| "score": 81.82, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-27B", | |
| "short_name": "Qwen3.5-27B", | |
| "provider": "Qwen", | |
| "score": 81.06, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", | |
| "short_name": "Qwen3-30B-A3B-Thinking-2507", | |
| "provider": "Qwen", | |
| "score": 78.79, | |
| "date": "2025-07-29" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-9B", | |
| "short_name": "Qwen3.5-9B", | |
| "provider": "Qwen", | |
| "score": 71.21, | |
| "date": "2026-02-27" | |
| }, | |
| { | |
| "model_id": "lm-provers/QED-Nano", | |
| "short_name": "QED-Nano", | |
| "provider": "lm-provers", | |
| "score": 62.88, | |
| "date": "2026-02-12" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-4B-Thinking-2507", | |
| "short_name": "Qwen3-4B-Thinking-2507", | |
| "provider": "Qwen", | |
| "score": 53.03, | |
| "date": "2025-08-05" | |
| } | |
| ] | |
| }, | |
| "olmOcr": { | |
| "name": "olmOCR-bench", | |
| "models": [ | |
| { | |
| "model_id": "datalab-to/chandra-ocr-2", | |
| "short_name": "chandra-ocr-2", | |
| "provider": "datalab-to", | |
| "score": 85.9, | |
| "date": "2026-03-16" | |
| }, | |
| { | |
| "model_id": "rednote-hilab/dots.mocr", | |
| "short_name": "dots.mocr", | |
| "provider": "rednote-hilab", | |
| "score": 83.9, | |
| "date": "2026-03-19" | |
| }, | |
| { | |
| "model_id": "lightonai/LightOnOCR-2-1B", | |
| "short_name": "LightOnOCR-2-1B", | |
| "provider": "lightonai", | |
| "score": 83.2, | |
| "date": "2026-01-16" | |
| }, | |
| { | |
| "model_id": "datalab-to/chandra", | |
| "short_name": "chandra", | |
| "provider": "datalab-to", | |
| "score": 83.1, | |
| "date": "2025-10-21" | |
| }, | |
| { | |
| "model_id": "infly/Infinity-Parser-7B", | |
| "short_name": "Infinity-Parser-7B", | |
| "provider": "infly", | |
| "score": 82.5, | |
| "date": "2025-10-17" | |
| }, | |
| { | |
| "model_id": "allenai/olmOCR-2-7B-1025-FP8", | |
| "short_name": "olmOCR-2-7B-1025-FP8", | |
| "provider": "allenai", | |
| "score": 82.4, | |
| "date": "2025-10-06" | |
| }, | |
| { | |
| "model_id": "PaddlePaddle/PaddleOCR-VL", | |
| "short_name": "PaddleOCR-VL", | |
| "provider": "PaddlePaddle", | |
| "score": 80.0, | |
| "date": "2025-10-16" | |
| }, | |
| { | |
| "model_id": "baidu/Qianfan-OCR", | |
| "short_name": "Qianfan-OCR", | |
| "provider": "baidu", | |
| "score": 79.8, | |
| "date": "2026-03-18" | |
| }, | |
| { | |
| "model_id": "rednote-hilab/dots.ocr", | |
| "short_name": "dots.ocr", | |
| "provider": "rednote-hilab", | |
| "score": 79.1, | |
| "date": "2025-07-30" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-OCR-2", | |
| "short_name": "DeepSeek-OCR-2", | |
| "provider": "deepseek-ai", | |
| "score": 76.3, | |
| "date": "2026-01-27" | |
| }, | |
| { | |
| "model_id": "lightonai/LightOnOCR-1B-1025", | |
| "short_name": "LightOnOCR-1B-1025", | |
| "provider": "lightonai", | |
| "score": 76.1, | |
| "date": "2025-10-20" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-OCR", | |
| "short_name": "DeepSeek-OCR", | |
| "provider": "deepseek-ai", | |
| "score": 75.7, | |
| "date": "2025-10-17" | |
| }, | |
| { | |
| "model_id": "opendatalab/MinerU2.5-2509-1.2B", | |
| "short_name": "MinerU2.5-2509-1.2B", | |
| "provider": "opendatalab", | |
| "score": 75.2, | |
| "date": "2025-09-17" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-OCR", | |
| "short_name": "GLM-OCR", | |
| "provider": "zai-org", | |
| "score": 75.2, | |
| "date": "2026-01-30" | |
| }, | |
| { | |
| "model_id": "FireRedTeam/FireRed-OCR", | |
| "short_name": "FireRed-OCR", | |
| "provider": "FireRedTeam", | |
| "score": 70.2, | |
| "date": "2026-02-28" | |
| }, | |
| { | |
| "model_id": "nanonets/Nanonets-OCR2-3B", | |
| "short_name": "Nanonets-OCR2-3B", | |
| "provider": "nanonets", | |
| "score": 69.5, | |
| "date": "2025-10-13" | |
| } | |
| ] | |
| }, | |
| "terminalBench": { | |
| "name": "Terminal-Bench 2.0", | |
| "models": [ | |
| { | |
| "model_id": "Qwen/Qwen3.5-397B-A17B", | |
| "short_name": "Qwen3.5-397B-A17B", | |
| "provider": "Qwen", | |
| "score": 52.5, | |
| "date": "2026-02-16" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-5", | |
| "short_name": "GLM-5", | |
| "provider": "zai-org", | |
| "score": 52.4, | |
| "date": "2026-02-11" | |
| }, | |
| { | |
| "model_id": "stepfun-ai/Step-3.5-Flash", | |
| "short_name": "Step-3.5-Flash", | |
| "provider": "stepfun-ai", | |
| "score": 51.0, | |
| "date": "2026-02-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-122B-A10B", | |
| "short_name": "Qwen3.5-122B-A10B", | |
| "provider": "Qwen", | |
| "score": 49.4, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2.5", | |
| "short_name": "Kimi-K2.5", | |
| "provider": "moonshotai", | |
| "score": 43.2, | |
| "date": "2026-01-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-27B", | |
| "short_name": "Qwen3.5-27B", | |
| "provider": "Qwen", | |
| "score": 41.6, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3.5-35B-A3B", | |
| "short_name": "Qwen3.5-35B-A3B", | |
| "provider": "Qwen", | |
| "score": 40.5, | |
| "date": "2026-02-24" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |
| "short_name": "DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "score": 39.6, | |
| "date": "2025-12-01" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-Coder-Next", | |
| "short_name": "Qwen3-Coder-Next", | |
| "provider": "Qwen", | |
| "score": 36.2, | |
| "date": "2026-01-30" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2-Thinking", | |
| "short_name": "Kimi-K2-Thinking", | |
| "provider": "moonshotai", | |
| "score": 35.7, | |
| "date": "2025-11-04" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.7", | |
| "short_name": "GLM-4.7", | |
| "provider": "zai-org", | |
| "score": 33.4, | |
| "date": "2025-12-22" | |
| }, | |
| { | |
| "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "provider": "nvidia", | |
| "score": 31.0, | |
| "date": "2026-03-10" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2", | |
| "short_name": "MiniMax-M2", | |
| "provider": "MiniMaxAI", | |
| "score": 30.0, | |
| "date": "2025-10-22" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |
| "short_name": "MiniMax-M2.1", | |
| "provider": "MiniMaxAI", | |
| "score": 29.2, | |
| "date": "2025-12-20" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2-Instruct", | |
| "short_name": "Kimi-K2-Instruct", | |
| "provider": "moonshotai", | |
| "score": 27.8, | |
| "date": "2025-07-11" | |
| }, | |
| { | |
| "model_id": "nvidia/Nemotron-Terminal-32B", | |
| "short_name": "Nemotron-Terminal-32B", | |
| "provider": "nvidia", | |
| "score": 27.4, | |
| "date": "2026-02-17" | |
| }, | |
| { | |
| "model_id": "zai-org/GLM-4.6", | |
| "short_name": "GLM-4.6", | |
| "provider": "zai-org", | |
| "score": 24.5, | |
| "date": "2025-09-29" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", | |
| "short_name": "Qwen3-Coder-480B-A35B-Instruct", | |
| "provider": "Qwen", | |
| "score": 23.9, | |
| "date": "2025-07-22" | |
| }, | |
| { | |
| "model_id": "nvidia/Nemotron-Terminal-14B", | |
| "short_name": "Nemotron-Terminal-14B", | |
| "provider": "nvidia", | |
| "score": 20.2, | |
| "date": "2026-02-17" | |
| }, | |
| { | |
| "model_id": "nvidia/Nemotron-Terminal-8B", | |
| "short_name": "Nemotron-Terminal-8B", | |
| "provider": "nvidia", | |
| "score": 13.0, | |
| "date": "2026-02-17" | |
| } | |
| ] | |
| }, | |
| "evasionBench": { | |
| "name": "EvasionBench", | |
| "models": [ | |
| { | |
| "model_id": "zai-org/GLM-4.7", | |
| "short_name": "GLM-4.7", | |
| "provider": "zai-org", | |
| "score": 82.91, | |
| "date": "2025-12-22" | |
| }, | |
| { | |
| "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", | |
| "short_name": "Qwen3-Coder-480B-A35B-Instruct", | |
| "provider": "Qwen", | |
| "score": 78.16, | |
| "date": "2025-07-22" | |
| }, | |
| { | |
| "model_id": "MiniMaxAI/MiniMax-M2.1", | |
| "short_name": "MiniMax-M2.1", | |
| "provider": "MiniMaxAI", | |
| "score": 71.31, | |
| "date": "2025-12-20" | |
| }, | |
| { | |
| "model_id": "deepseek-ai/DeepSeek-V3.2", | |
| "short_name": "DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "score": 66.88, | |
| "date": "2025-12-01" | |
| }, | |
| { | |
| "model_id": "moonshotai/Kimi-K2-Instruct-0905", | |
| "short_name": "Kimi-K2-Instruct-0905", | |
| "provider": "moonshotai", | |
| "score": 66.68, | |
| "date": "2025-09-03" | |
| } | |
| ] | |
| } | |
| }, | |
| "logos": { | |
| "miromind-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/682c41fb2f8a52030ec93ce0/Cna52_IapEXuNBsyI3lvR.png", | |
| "opendatalab": "https://cdn-avatars.huggingface.co/v1/production/uploads/639c3afa7432f2f5d16b7296/yqxxBknyeqkGnYsjoaR4M.png", | |
| "HelpingAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/6612aedf09f16e7347dfa7e1/jHRLPBTlyykFwrd6-Mak_.png", | |
| "FireRedTeam": "https://cdn-avatars.huggingface.co/v1/production/uploads/66ec07ef12bd743cfe91004e/PK3bgl6aF2RzW1QFKkq8R.png", | |
| "baidu": "https://cdn-avatars.huggingface.co/v1/production/uploads/64f187a2cc1c03340ac30498/TYYUxK8xD1AxExFMWqbZD.png", | |
| "facebook": "https://cdn-avatars.huggingface.co/v1/production/uploads/1592839207516-noauth.png", | |
| "meta-llama": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png", | |
| "stepfun-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/66935cee39002fc0569c2943/Qv8QPbkgoKE3wR4jTzHiy.png", | |
| "mistralai": "https://cdn-avatars.huggingface.co/v1/production/uploads/634c17653d11eaedd88b314d/9OgyfKstSZtbmsmuG8MbU.png", | |
| "GAIR": "https://cdn-avatars.huggingface.co/v1/production/uploads/6144a0c4ff1146bbd84d9865/NqAuVddq2ci-AsFcFNbav.png", | |
| "XiaomiMiMo": "https://cdn-avatars.huggingface.co/v1/production/uploads/680cb7d1233834890a64acee/5w_4aLfF-7MAyaIPOV498.jpeg", | |
| "nvidia": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png", | |
| "LGAI-EXAONE": "https://cdn-avatars.huggingface.co/v1/production/uploads/66a899a72f11aaf66001a8dc/UfdrP3GMo9pNT62BaMnhw.png", | |
| "jdopensource": "https://cdn-avatars.huggingface.co/v1/production/uploads/68c0e2ab44ea28a974e3074b/g-4gTubd16qUtwmGZ0n4h.png", | |
| "Qwen": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png", | |
| "tiiuae": "https://cdn-avatars.huggingface.co/v1/production/uploads/61a8d1aac664736898ffc84f/AT6cAB5ZNwCcqFMal71WD.jpeg", | |
| "LiquidAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/EsTgVtnM2IqVRKgPdfqcB.png", | |
| "allenai": "https://cdn-avatars.huggingface.co/v1/production/uploads/652db071b62cf1f8463221e2/CxxwFiaomTa1MCX_B7-pT.png", | |
| "SWE-Lego": "https://cdn-avatars.huggingface.co/v1/production/uploads/60fc2fcca6bdebbe52dfdaf4/AeuYwUH-CQCt893qnmAGa.png", | |
| "datalab-to": "https://cdn-avatars.huggingface.co/v1/production/uploads/67ab6afe315e622f597bf9e8/YOgg0gVYVXZC1PDIHFTWK.png", | |
| "PaddlePaddle": "https://cdn-avatars.huggingface.co/v1/production/uploads/1654942635336-5f3ff69679c1ba4c353d0c5a.png", | |
| "nanonets": "https://cdn-avatars.huggingface.co/v1/production/uploads/641fc216a390e539522d511f/Xtxh40e8zSzkuKtCr58DH.jpeg", | |
| "Nanbeige": "https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png", | |
| "infly": "https://cdn-avatars.huggingface.co/v1/production/uploads/63ed9862679c2cc40abb55d2/0n6g0jngiKkRjaEoAvPmM.png", | |
| "openai": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png", | |
| "meituan-longcat": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png", | |
| "lm-provers": "https://cdn-avatars.huggingface.co/v1/production/uploads/5f0c746619cb630495b814fd/Td4sH4W-LIdR89AqHCuw3.jpeg", | |
| "openbmb": "https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png", | |
| "arcee-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6435718aaaef013d1aec3b8b/GZPnGkfMn8Ino6JbkL4fJ.png", | |
| "lightonai": "https://cdn-avatars.huggingface.co/v1/production/uploads/1651597775471-62715572ab9243b5d40cbb1d.png", | |
| "MiniMaxAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg", | |
| "rednote-hilab": "https://cdn-avatars.huggingface.co/v1/production/uploads/6807a1d6504547b3554b9c73/WgnnQDsz7FqnyTtv8mmRO.png", | |
| "google": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png", | |
| "zai-org": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png", | |
| "deepseek-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png", | |
| "moonshotai": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg" | |
| }, | |
| "colors": { | |
| "FireRedTeam": "#6366f1", | |
| "GAIR": "#0d9488", | |
| "HelpingAI": "#d97706", | |
| "LGAI-EXAONE": "#e11d48", | |
| "LiquidAI": "#7c3aed", | |
| "MiniMaxAI": "#16a34a", | |
| "Nanbeige": "#2563eb", | |
| "PaddlePaddle": "#ea580c", | |
| "Qwen": "#8b5cf6", | |
| "SWE-Lego": "#0891b2", | |
| "XiaomiMiMo": "#c026d3", | |
| "allenai": "#65a30d", | |
| "arcee-ai": "#dc2626", | |
| "baidu": "#0284c7", | |
| "datalab-to": "#a21caf", | |
| "deepseek-ai": "#059669", | |
| "facebook": "#9333ea", | |
| "google": "#ca8a04", | |
| "infly": "#be185d", | |
| "jdopensource": "#0369a1", | |
| "lightonai": "#6366f1", | |
| "lm-provers": "#0d9488", | |
| "meituan-longcat": "#d97706", | |
| "meta-llama": "#e11d48", | |
| "miromind-ai": "#7c3aed", | |
| "mistralai": "#16a34a", | |
| "moonshotai": "#2563eb", | |
| "nanonets": "#ea580c", | |
| "nvidia": "#8b5cf6", | |
| "openai": "#0891b2", | |
| "openbmb": "#c026d3", | |
| "opendatalab": "#65a30d", | |
| "rednote-hilab": "#dc2626", | |
| "stepfun-ai": "#0284c7", | |
| "tiiuae": "#a21caf", | |
| "zai-org": "#059669" | |
| }, | |
| "generated_at": "2026-03-21T20:00:33.241053+00:00" | |
| } |