Spaces:
Running
Running
| <html lang="ko"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>World Model Bench (WM Bench) 2026 β Embodied AI Benchmark</title> | |
| <meta name="description" content="The first benchmark measuring cognitive abilities of World Models. Beyond FID β Measuring Intelligence, Not Just Motion. By VIDRAFT / FINAL Bench Family."> | |
| <meta name="keywords" content="world model benchmark, embodied AI, WM Bench, VIDRAFT, PROMETHEUS, cognitive AI, embodied intelligence, motion generation, AI benchmark 2026"> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.1/chart.umd.min.js"></script> | |
| <link href="https://fonts.googleapis.com/css2?family=Sora:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600;700&display=swap" rel="stylesheet"> | |
| <style> | |
| *{margin:0;padding:0;box-sizing:border-box;} | |
| :root{ | |
| --bg:#f8f9fc;--bg2:#f0f2f8;--surface:#ffffff;--surface-alt:#f5f6fa; | |
| --border:#e2e5f0;--border-hover:#c7cce0; | |
| --shadow-sm:0 1px 3px rgba(15,23,42,.04),0 1px 2px rgba(15,23,42,.06); | |
| --shadow:0 4px 16px rgba(15,23,42,.06),0 1px 3px rgba(15,23,42,.08); | |
| --shadow-lg:0 12px 40px rgba(15,23,42,.08),0 4px 12px rgba(15,23,42,.06); | |
| --text:#0f172a;--text-sec:#475569;--text-muted:#94a3b8; | |
| --ac:#e8593c;--ac2:#c94a2e;--ac-bg:rgba(232,89,60,.06); | |
| --p1:#7b8fd4;--p2:#e8593c;--p3:#d4a044; | |
| --teal:#0d9488;--green:#16a34a;--rose:#e11d48;--amber:#d97706; | |
| --radius:16px;--radius-sm:10px; | |
| --font:'Sora',sans-serif;--mono:'JetBrains Mono',monospace; | |
| --tr:0.22s cubic-bezier(0.4,0,0.2,1); | |
| } | |
| html{scroll-behavior:smooth;} | |
| body{font-family:var(--font);background:var(--bg);color:var(--text);min-height:100vh;-webkit-font-smoothing:antialiased;font-size:13px;} | |
| ::-webkit-scrollbar{width:5px;height:4px;} | |
| ::-webkit-scrollbar-track{background:transparent;} | |
| ::-webkit-scrollbar-thumb{background:rgba(232,89,60,.2);border-radius:10px;} | |
| ::-webkit-scrollbar-thumb:hover{background:rgba(232,89,60,.4);} | |
| .mn a:hover{color:var(--ac)!important;text-decoration:underline!important;} | |
| ::selection{background:rgba(232,89,60,.12);} | |
| body::before{content:"";position:fixed;inset:0;z-index:0;pointer-events:none; | |
| background:radial-gradient(ellipse 70% 45% at 15% 8%,rgba(232,89,60,.04),transparent 55%), | |
| radial-gradient(ellipse 55% 35% at 85% 92%,rgba(212,160,68,.04),transparent 50%);} | |
| .wrap{position:relative;z-index:1;max-width:1400px;margin:0 auto;padding:22px 16px 70px;} | |
| /* HEADER */ | |
| header{text-align:center;margin-bottom:20px;animation:fadeIn .6s ease-out;} | |
| @keyframes fadeIn{from{opacity:0;transform:translateY(-10px)}to{opacity:1;transform:translateY(0)}} | |
| .badge-row{display:flex;align-items:center;justify-content:center;gap:8px;margin-bottom:10px;flex-wrap:wrap;} | |
| .badge{display:inline-flex;align-items:center;gap:6px;background:var(--surface);border:1px solid var(--border);border-radius:100px;padding:4px 14px;font-family:var(--mono);font-size:9px;font-weight:600;letter-spacing:2px;text-transform:uppercase;color:var(--ac);box-shadow:var(--shadow-sm);} | |
| .pulse{width:5px;height:5px;border-radius:50%;background:var(--ac);animation:p 2s infinite;} | |
| @keyframes p{0%,100%{opacity:1;transform:scale(1)}50%{opacity:.4;transform:scale(.8)}} | |
| h1{font-size:clamp(20px,3vw,38px);font-weight:800;line-height:1.1;letter-spacing:-1.5px;margin-bottom:6px; | |
| background:linear-gradient(135deg,#1e1b4b 10%,#e8593c 50%,#d4a044 90%);background-size:200%; | |
| -webkit-background-clip:text;-webkit-text-fill-color:transparent;animation:shimmer 6s ease-in-out infinite;} | |
| @keyframes shimmer{0%,100%{background-position:0%}50%{background-position:100%}} | |
| .sub{color:var(--text-muted);font-size:10px;line-height:1.8;} | |
| .sub b{color:var(--text-sec);font-weight:600;-webkit-text-fill-color:var(--text-sec);} | |
| /* STATS */ | |
| .stats{display:flex;flex-wrap:wrap;gap:7px;justify-content:center;margin-bottom:16px;} | |
| .st{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:8px 14px;text-align:center;min-width:80px;box-shadow:var(--shadow-sm);transition:var(--tr);} | |
| .st:hover{box-shadow:var(--shadow);border-color:var(--border-hover);} | |
| .stn{font-family:var(--mono);font-size:15px;font-weight:700;color:var(--ac);} | |
| .stl{font-size:8.5px;color:var(--text-muted);margin-top:2px;text-transform:uppercase;letter-spacing:.5px;} | |
| /* TABS */ | |
| .tab-bar{display:flex;gap:0;border-bottom:1px solid var(--border);background:var(--surface);border-radius:var(--radius-sm) var(--radius-sm) 0 0;overflow-x:auto;box-shadow:var(--shadow-sm);} | |
| .tab{padding:10px 20px;font-size:10.5px;font-family:var(--mono);font-weight:600;color:var(--text-muted);cursor:pointer;border-bottom:2px solid transparent;transition:var(--tr);user-select:none;white-space:nowrap;letter-spacing:.3px;flex-shrink:0;} | |
| .tab:hover{color:var(--text);background:var(--ac-bg);} | |
| .tab.on{color:var(--ac);border-bottom-color:var(--ac);background:var(--ac-bg);} | |
| .tpane{padding-top:14px;}.wm-p1,.wm-p2,.wm-p3,.wm-p4,.wm-p5{display:none;} | |
| /* TABLE */ | |
| .tw{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);overflow-x:auto;box-shadow:var(--shadow);} | |
| table{width:100%;border-collapse:collapse;font-size:11px;} | |
| thead{background:var(--surface-alt);} | |
| thead tr:last-child{border-bottom:2px solid var(--border);} | |
| th{padding:8px 6px;text-align:center;font-size:8px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.3px;color:var(--text-muted);white-space:nowrap;cursor:pointer;user-select:none;vertical-align:bottom;line-height:1.6;font-weight:600;} | |
| th.c-model{text-align:left;padding-left:12px;min-width:180px;position:sticky;left:0;background:var(--surface-alt);z-index:2;} | |
| th:hover,th.on{color:var(--ac);} | |
| .sa{opacity:.5;font-size:6px;margin-left:2px;} | |
| tbody tr{border-bottom:1px solid var(--border);transition:background var(--tr);} | |
| tbody tr:last-child{border-bottom:none;} | |
| tbody tr:hover{background:rgba(232,89,60,.02);} | |
| tbody tr.hl{background:rgba(22,163,74,.025);} | |
| tbody tr.hidden{display:none;} | |
| td{padding:8px 6px;text-align:center;vertical-align:middle;} | |
| td.c-model{text-align:left;padding-left:12px;position:sticky;left:0;background:var(--surface);z-index:1;} | |
| tbody tr:hover td.c-model{background:rgba(232,89,60,.02);} | |
| /* MODEL CELL */ | |
| .mc{display:flex;flex-direction:column;gap:2px;} | |
| .mn{font-weight:700;font-size:12px;color:var(--text);display:flex;align-items:center;gap:4px;} | |
| .mp{font-size:8px;color:var(--text-muted);font-family:var(--mono);} | |
| /* SCORE CELL */ | |
| .sc{display:flex;flex-direction:column;align-items:center;gap:2px;} | |
| .sn{font-family:var(--mono);font-size:11px;font-weight:700;} | |
| .sb{width:36px;height:3px;background:var(--border);border-radius:2px;overflow:hidden;} | |
| .sf{height:100%;border-radius:2px;} | |
| /* GRADE BADGE */ | |
| .gr{display:inline-block;padding:2px 7px;border-radius:5px;font-family:var(--mono);font-size:9px;font-weight:800;} | |
| .gr-S{background:rgba(255,215,0,.15);color:#b8860b;border:1px solid rgba(255,215,0,.4);} | |
| .gr-A{background:rgba(192,192,192,.15);color:#708090;border:1px solid rgba(192,192,192,.4);} | |
| .gr-B{background:rgba(205,127,50,.15);color:#8b6914;border:1px solid rgba(205,127,50,.4);} | |
| .gr-C{background:rgba(99,102,241,.1);color:#4f46e5;border:1px solid rgba(99,102,241,.2);} | |
| .gr-D{background:rgba(100,116,139,.1);color:#64748b;border:1px solid rgba(100,116,139,.2);} | |
| .gr-F{background:rgba(225,29,72,.1);color:#e11d48;border:1px solid rgba(225,29,72,.2);} | |
| /* TRACK BADGE */ | |
| .tb{display:inline-block;padding:1.5px 6px;border-radius:4px;font-family:var(--mono);font-size:8px;font-weight:700;} | |
| .tb-C{background:rgba(22,163,74,.1);color:#16a34a;border:1px solid rgba(22,163,74,.2);} | |
| .tb-B{background:rgba(99,102,241,.1);color:#6366f1;border:1px solid rgba(99,102,241,.2);} | |
| .tb-A{background:rgba(100,116,139,.1);color:#64748b;border:1px solid rgba(100,116,139,.2);} | |
| /* PILLAR COLS */ | |
| .p1-col{background:rgba(123,143,212,.03);} | |
| .p2-col{background:rgba(232,89,60,.03);} | |
| .p3-col{background:rgba(212,160,68,.03);} | |
| /* CARDS */ | |
| .card{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);transition:var(--tr);} | |
| .card:hover{box-shadow:var(--shadow);border-color:var(--border-hover);} | |
| .card h3{font-size:11px;font-family:var(--mono);font-weight:700;color:var(--ac);text-transform:uppercase;letter-spacing:.8px;margin-bottom:4px;} | |
| .card p{font-size:9.5px;color:var(--text-muted);line-height:1.7;margin-bottom:10px;} | |
| .grid2{display:grid;grid-template-columns:1fr 1fr;gap:12px;} | |
| .grid3{display:grid;grid-template-columns:1fr 1fr 1fr;gap:12px;} | |
| /* BAR CHARTS */ | |
| .bar-row{display:flex;align-items:center;gap:8px;margin-bottom:6px;} | |
| .bar-label{font-size:9px;font-family:var(--mono);color:var(--text-sec);width:180px;flex-shrink:0;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;} | |
| .bar-track{flex:1;height:8px;background:var(--bg2);border-radius:4px;overflow:hidden;} | |
| .bar-fill{height:100%;border-radius:4px;transition:width .8s cubic-bezier(.4,0,.2,1);} | |
| .bar-val{font-size:9px;font-family:var(--mono);font-weight:700;color:var(--text);width:36px;text-align:right;flex-shrink:0;} | |
| /* PILLAR BOXES */ | |
| .pillar-box{border-radius:var(--radius-sm);padding:14px 16px;border:1px solid var(--border);} | |
| .pillar-p1{background:linear-gradient(135deg,rgba(123,143,212,.08),rgba(123,143,212,.03));border-color:rgba(123,143,212,.25);} | |
| .pillar-p2{background:linear-gradient(135deg,rgba(232,89,60,.08),rgba(232,89,60,.03));border-color:rgba(232,89,60,.25);} | |
| .pillar-p3{background:linear-gradient(135deg,rgba(212,160,68,.08),rgba(212,160,68,.03));border-color:rgba(212,160,68,.25);} | |
| /* LEGEND */ | |
| .leg{margin-top:12px;display:flex;flex-wrap:wrap;gap:10px;align-items:center;} | |
| .lt{font-size:8.5px;font-family:var(--mono);color:var(--text-muted);text-transform:uppercase;letter-spacing:.8px;font-weight:600;} | |
| .li{display:flex;align-items:center;gap:3px;font-size:9.5px;color:var(--text-sec);} | |
| .ld{width:7px;height:7px;border-radius:50%;} | |
| /* INSIGHT BOX */ | |
| .insight{padding:10px 14px;background:var(--ac-bg);border-radius:8px;border-left:3px solid var(--ac);font-size:9px;color:var(--text-sec);line-height:1.7;} | |
| .insight b{color:var(--ac);} | |
| /* MODAL */ | |
| .modal-overlay{display:none;position:fixed;inset:0;background:rgba(15,23,42,.5);z-index:1000;justify-content:center;align-items:center;backdrop-filter:blur(4px);} | |
| .modal-overlay.open{display:flex;} | |
| .modal-box{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:24px;max-width:640px;width:92%;max-height:85vh;overflow-y:auto;box-shadow:var(--shadow-lg);position:relative;animation:modalIn .2s ease;} | |
| @keyframes modalIn{from{opacity:0;transform:scale(.95)}to{opacity:1;transform:scale(1)}} | |
| .modal-close{position:absolute;top:12px;right:16px;background:none;border:none;font-size:20px;cursor:pointer;color:var(--text-muted);} | |
| .modal-close:hover{color:var(--text);} | |
| /* DARK MODE */ | |
| body.dark{--bg:#0f172a;--bg2:#1e293b;--surface:#1e293b;--surface-alt:#334155; | |
| --border:#334155;--border-hover:#475569;--text:#e2e8f0;--text-sec:#94a3b8;--text-muted:#64748b; | |
| --shadow-sm:0 1px 3px rgba(0,0,0,.3);--shadow:0 4px 16px rgba(0,0,0,.3);--shadow-lg:0 12px 40px rgba(0,0,0,.4);} | |
| body.dark th.c-model,body.dark td.c-model{background:var(--surface)!important;} | |
| body.dark thead{background:var(--surface-alt)!important;} | |
| /* MOBILE */ | |
| @media(max-width:768px){ | |
| .wrap{padding:10px 8px 40px;} | |
| h1{font-size:20px!important;} | |
| .tab-bar{flex-wrap:nowrap;} | |
| .grid2,.grid3{grid-template-columns:1fr!important;} | |
| th.c-model,td.c-model{position:static!important;} | |
| } | |
| <style> | |
| .wm-radio{display:none;} | |
| .wm-label{padding:10px 18px;font-size:10.5px;font-family:var(--mono);font-weight:600;color:var(--text-muted);cursor:pointer;border-bottom:2px solid transparent;transition:var(--tr);user-select:none;white-space:nowrap;letter-spacing:.3px;flex-shrink:0;display:inline-block;} | |
| .wm-label:hover{color:var(--text);background:var(--ac-bg);} | |
| .wm-p1,.wm-p2,.wm-p3,.wm-p4,.wm-p5{display:none;} | |
| #wm-r1:checked ~ .wm-p1, | |
| #wm-r2:checked ~ .wm-p2, | |
| #wm-r3:checked ~ .wm-p3, | |
| #wm-r4:checked ~ .wm-p4, | |
| #wm-r5:checked ~ .wm-p5{display:block;} | |
| #wm-r1:checked ~ .wm-bar label[for=wm-r1], | |
| #wm-r2:checked ~ .wm-bar label[for=wm-r2], | |
| #wm-r3:checked ~ .wm-bar label[for=wm-r3], | |
| #wm-r4:checked ~ .wm-bar label[for=wm-r4], | |
| #wm-r5:checked ~ .wm-bar label[for=wm-r5]{color:var(--ac);border-bottom-color:var(--ac);background:var(--ac-bg);} | |
| </style> | |
| </style> | |
| </head> | |
| <body> | |
| <div class="wrap"> | |
| <!-- ===== HEADER ===== --> | |
| <header> | |
| <div class="badge-row"> | |
| <div class="badge"><div class="pulse"></div>LIVE Β· 2026.03 Β· v1.0</div> | |
| <button id="WM_DARK_BTN" | |
| style="background:var(--surface);border:1px solid var(--border);border-radius:20px;padding:4px 14px;font-size:10px;font-family:var(--mono);color:var(--text-sec);cursor:pointer;font-weight:700;transition:all .2s;box-shadow:var(--shadow-sm)">π Dark</button> | |
| <div style="display:flex;gap:4px;flex-wrap:wrap"> | |
| <a href="https://huggingface.co/datasets/FINAL-Bench/World-Model" target="_blank" | |
| style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#ff9d00,#ffcd00);color:#1a1a2e;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none;box-shadow:0 1px 3px rgba(255,157,0,.3)">π€ Dataset</a> | |
| <a href="https://huggingface.co/spaces/FINAL-Bench/World-Model" target="_blank" | |
| style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#e55d2b,#f59e0b);color:#fff;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none;box-shadow:0 1px 3px rgba(229,93,43,.3)">π World Model</a> | |
| <a href="https://huggingface.co/blog/FINAL-Bench/world-model" target="_blank" | |
| style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#1d4ed8,#6366f1);color:#fff;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none;box-shadow:0 1px 3px rgba(99,102,241,.3)">π Article</a> | |
| <a href="https://huggingface.co/spaces/FINAL-Bench/all-bench-leaderboard" target="_blank" | |
| style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#7c3aed,#6366f1);color:#fff;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none">π ALL Bench</a> | |
| <a href="https://huggingface.co/spaces/FINAL-Bench/Leaderboard" target="_blank" | |
| style="display:inline-flex;align-items:center;gap:4px;background:linear-gradient(135deg,#0d9488,#059669);color:#fff;font-family:var(--mono);font-size:8px;font-weight:800;padding:3px 10px;border-radius:14px;text-decoration:none">𧬠FINAL Bench</a> | |
| </div> | |
| </div> | |
| <h1>π₯ World Model Bench 2026</h1> | |
| <p class="sub" style="margin-bottom:8px"> | |
| <b>Beyond FID β Measuring Intelligence, Not Just Motion.</b> The first benchmark for evaluating <b>cognitive abilities</b> of World Models in Embodied Intelligence.<br> | |
| 3 Pillars Β· 10 Categories Β· 100 Scenarios Β· Automatic Scoring Β· Part of <b>FINAL Bench Family</b> by VIDRAFT | |
| </p> | |
| <div style="display:inline-flex;align-items:center;gap:8px;background:linear-gradient(135deg,rgba(232,89,60,.06),rgba(212,160,68,.04));border:1px solid var(--border);border-radius:10px;padding:6px 14px;margin-bottom:8px;font-size:9px;line-height:1.6"> | |
| <span style="font-family:var(--mono);font-weight:700;color:var(--ac)">WM Score = P1(250) + P2(450) + P3(300)</span> | |
| <span style="color:var(--text-muted)">β</span> | |
| <span style="color:var(--text-sec)">π Perception Β· π§ Cognition Β· π₯ Embodiment</span> | |
| <span style="color:var(--text-muted)">β</span> | |
| <span> | |
| <span style="color:#b8860b;font-weight:700">S</span><span style="color:var(--text-muted)">β₯900</span> | |
| <span style="color:#708090;font-weight:700">A</span><span style="color:var(--text-muted)">β₯750</span> | |
| <span style="color:#8b6914;font-weight:700">B</span><span style="color:var(--text-muted)">β₯600</span> | |
| <span style="color:#4f46e5;font-weight:700">C</span><span style="color:var(--text-muted)">β₯400</span> | |
| </span> | |
| </div> | |
| </header> | |
| <!-- ===== STATS ===== --> | |
| <div class="stats"> | |
| <div class="st"><div class="stn">26</div><div class="stl">Models</div></div> | |
| <div class="st"><div class="stn">3</div><div class="stl">Pillars</div></div> | |
| <div class="st"><div class="stn">10</div><div class="stl">Categories</div></div> | |
| <div class="st"><div class="stn">100</div><div class="stl">Scenarios</div></div> | |
| <div class="st"><div class="stn" style="color:var(--green)">726</div><div class="stl">Top Score</div></div> | |
| <div class="st"><div class="stn">1000</div><div class="stl">Max Score</div></div> | |
| </div> | |
| <!-- ===== TABS ===== --> | |
| <input class="wm-radio" type="radio" id="wm-r1" name="wm-tab" checked> | |
| <input class="wm-radio" type="radio" id="wm-r2" name="wm-tab"> | |
| <input class="wm-radio" type="radio" id="wm-r3" name="wm-tab"> | |
| <input class="wm-radio" type="radio" id="wm-r4" name="wm-tab"> | |
| <input class="wm-radio" type="radio" id="wm-r5" name="wm-tab"> | |
| <div class="tab-bar wm-bar" id="WM_TAB_BAR"> | |
| <label class="wm-label" for="wm-r1">π Leaderboard</label> | |
| <label class="wm-label" for="wm-r2">π Categories</label> | |
| <label class="wm-label" for="wm-r3">π Structure</label> | |
| <label class="wm-label" for="wm-r4">π Submit</label> | |
| <label class="wm-label" for="wm-r5">βΉοΈ About</label> | |
| </div> | |
| <!-- ===== TAB: LEADERBOARD ===== --> | |
| <div id="lb" class="tpane wm-p1"> | |
| <div class="tw"> | |
| <table> | |
| <thead> | |
| <tr> | |
| <th class="c-model" style="text-align:left">Model</th> | |
| <th title="WM Score (0~1000)">WM Score<span class="sa">β</span></th> | |
| <th>Grade</th> | |
| <th class="p1-col" title="π Perception (0~250)">π Perception<span class="sa">β</span></th> | |
| <th class="p2-col" title="π§ Cognition (0~450)">π§ Cognition<span class="sa">β</span></th> | |
| <th class="p3-col" title="π₯ Embodiment (0~300)">π₯ Embodiment<span class="sa">β</span></th> | |
| <th title="FPS">FPS<span class="sa">β</span></th> | |
| <th title="Cognitive Latency (ms)">Lat(ms)<span class="sa">β</span></th> | |
| <th>Track</th> | |
| <th>Brain</th> | |
| <th>Motion</th> | |
| <th>GPU</th> | |
| </tr> | |
| </thead> | |
| <tbody id="LB_BODY"></tbody> | |
| </table> | |
| </div> | |
| <div class="leg"> | |
| <span class="lt">Grade:</span> | |
| <div class="li"><div class="ld" style="background:#b8860b"></div>Sβ₯900</div> | |
| <div class="li"><div class="ld" style="background:#708090"></div>Aβ₯750</div> | |
| <div class="li"><div class="ld" style="background:#8b6914"></div>Bβ₯600</div> | |
| <div class="li"><div class="ld" style="background:#4f46e5"></div>Cβ₯400</div> | |
| <div class="li"><div class="ld" style="background:#64748b"></div>Dβ₯200</div> | |
| <div class="li"><div class="ld" style="background:#e11d48"></div>F<200</div> | |
| <span style="font-family:var(--mono);font-size:8.5px;color:var(--green);margin-left:10px">β Track C = Live Demo Verified</span> | |
| </div> | |
| <div style="margin-top:16px;padding:12px 16px;background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);font-size:9px;color:var(--text-sec);line-height:1.7"> | |
| <b style="color:var(--ac)">Track A</b> = Text-Only Β· max 750 pts β | |
| <b style="color:#6366f1">Track B</b> = Text + Performance Β· max 1000 pts β | |
| <b style="color:var(--green)">Track C</b> = Live Demo + Verified Β· max 1000 pts + β | |
| </div> | |
| <div style="margin-top:20px;"> | |
| <div style="display:grid;grid-template-columns:1fr 1fr;gap:14px;margin-bottom:14px;"> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);"> | |
| <div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">π WM Score Ranking</div> | |
| <p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">β = officially verified Β· est. = estimated from published data</p> | |
| <canvas id="cBar" height="240"></canvas> | |
| <div style="margin-top:10px;padding:8px 10px;background:var(--ac-bg);border-radius:6px;font-size:8.5px;color:var(--text-sec);line-height:1.7;border-left:2px solid var(--ac)"><b>PROMETHEUS</b> is the only officially verified Track C model (726/1000 Β· Grade B). Others are estimates based on published data.</div> | |
| </div> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);"> | |
| <div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">πΈοΈ Pillar Radar β Top 5</div> | |
| <p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">Normalized % per pillar (100 = full marks for that pillar)</p> | |
| <canvas id="cRadar" height="240"></canvas> | |
| </div> | |
| </div> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);margin-bottom:14px;"> | |
| <div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">π Category Breakdown β Scored Models Γ 10 Categories</div> | |
| <p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">PROMETHEUS leads C04 Threat Diff Β· C05 Emotion Escalation by a wide margin. V-JEPA 2 strong on C03. GAIA-3 leads C01 from driving data.</p> | |
| <div style="overflow-x:auto;"><canvas id="cCats" style="min-width:780px;height:260px;"></canvas></div> | |
| <div style="margin-top:10px;padding:8px 10px;background:rgba(212,160,68,.07);border-radius:6px;font-size:8.5px;color:var(--text-sec);line-height:1.7;border-left:2px solid var(--p3)"><b>Key insight:</b> C05Β·C10 have zero prior research. DreamerV3 excels at C06 memory. V-JEPA 2 leads C10 body-swap (zero-shot robot).</div> | |
| </div> | |
| <div style="display:grid;grid-template-columns:1fr 1fr;gap:14px;"> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);"> | |
| <div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">π§ Cognition Gap (P2 Β· 450 pts)</div> | |
| <p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">The core differentiator of WM Bench β PROMETHEUS leads by a wide margin</p> | |
| <canvas id="cCognition" height="200"></canvas> | |
| </div> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);padding:18px;box-shadow:var(--shadow-sm);"> | |
| <div style="font-size:10.5px;font-family:var(--mono);font-weight:700;color:var(--ac);margin-bottom:3px;text-transform:uppercase;letter-spacing:.7px;">π Perception vs Cognition</div> | |
| <p style="font-size:9px;color:var(--text-muted);margin-bottom:12px;">Scatter plot β upper-right (high perception + high cognition) is ideal</p> | |
| <canvas id="cScatter" height="200"></canvas> | |
| </div> | |
| </div> | |
| </div></div> | |
| <div id="cats" class="tpane wm-p2"> | |
| <!-- P1 Header --> | |
| <div style="display:flex;align-items:center;gap:10px;margin-bottom:12px;padding:14px 16px;background:rgba(123,143,212,.06);border:1px solid rgba(123,143,212,.2);border-radius:var(--radius-sm);"> | |
| <span style="font-size:1.8rem">π</span> | |
| <div style="flex:1"> | |
| <div style="font-size:12px;font-weight:800;color:var(--p1)">P1 Β· Perception β 25% Β· 250 pts</div> | |
| <div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);margin-top:2px">How accurately does the model perceive its environment? Β· Covers areas analogous to existing metrics (Occupancy Grid, BABEL)</div> | |
| </div> | |
| <div style="text-align:right;flex-shrink:0"> | |
| <div style="font-family:var(--mono);font-size:24px;font-weight:800;color:var(--p1)">140<span style="font-size:11px;color:var(--text-muted)">/250</span></div> | |
| <div style="font-size:8px;color:var(--text-muted)">PROMETHEUS</div> | |
| </div> | |
| </div> | |
| <div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-bottom:20px;"> | |
| <!-- C01 --> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--p1);"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:var(--p1);font-size:13px">C01</span> | |
| <span style="font-weight:700;font-size:11px">Environmental Awareness</span> | |
| <span style="margin-left:auto;font-size:7.5px;background:#e2e5f0;color:#64748b;padding:1px 6px;border-radius:4px;font-family:var(--mono)">existing</span> | |
| </div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px"> | |
| Measures whether the model correctly identifies <b>walls, obstacles, and terrain</b> in all four directions (left, right, forward, back). Unlike occupancy grids which only check if space is free, WM Bench requires understanding of <b>distance-aware danger classification</b>. | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px"> | |
| <div style="color:var(--text-muted)">Example scene:</div> | |
| <div>walls: front=3.0m, left=null, right=null</div> | |
| <div style="color:var(--p1)">Expected β fwd=danger(wall), others=safe</div> | |
| </div> | |
| <div style="display:flex;align-items:center;justify-content:space-between"> | |
| <span style="font-size:8px;color:var(--text-muted)">Analogous: Occupancy Grid evaluation</span> | |
| <span style="font-family:var(--mono);font-weight:700;color:var(--p1);font-size:11px">PROM: 65/100</span> | |
| </div> | |
| </div> | |
| <!-- C02 --> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--p1);"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:var(--p1);font-size:13px">C02</span> | |
| <span style="font-weight:700;font-size:11px">Entity Recognition & Classification</span> | |
| <span style="margin-left:auto;font-size:7.5px;background:#e2e5f0;color:#64748b;padding:1px 6px;border-radius:4px;font-family:var(--mono)">existing</span> | |
| </div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px"> | |
| Tests whether the model correctly classifies <b>NPC type (beast / woman / man)</b>, behavior state (stop / approach / charge / wander), and translates this into appropriate danger assessment. A beast charging from 3m vs a woman waving from 3m must produce <b>completely different responses</b>. | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px"> | |
| <div style="color:var(--text-muted)">Example scene:</div> | |
| <div>npc_type:"beast", behavior:"charge", dist:3.0m</div> | |
| <div style="color:var(--p1)">Expected β fwd=danger(beast), sprint away</div> | |
| </div> | |
| <div style="display:flex;align-items:center;justify-content:space-between"> | |
| <span style="font-size:8px;color:var(--text-muted)">Analogous: BABEL action recognition</span> | |
| <span style="font-family:var(--mono);font-weight:700;color:var(--p1);font-size:11px">PROM: 75/100</span> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- P2 Header --> | |
| <div style="display:flex;align-items:center;gap:10px;margin-bottom:12px;padding:14px 16px;background:rgba(232,89,60,.05);border:1px solid rgba(232,89,60,.2);border-radius:var(--radius-sm);"> | |
| <span style="font-size:1.8rem">π§ </span> | |
| <div style="flex:1"> | |
| <div style="font-size:12px;font-weight:800;color:var(--p2)">P2 Β· Cognition β 45% Β· 450 pts Β· Core Differentiator</div> | |
| <div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);margin-top:2px">Does the model judge intelligently? Β· ALL 5 categories are first-ever definitions β no prior benchmark measures these</div> | |
| </div> | |
| <div style="text-align:right;flex-shrink:0"> | |
| <div style="font-family:var(--mono);font-size:24px;font-weight:800;color:var(--p2)">390<span style="font-size:11px;color:var(--text-muted)">/450</span></div> | |
| <div style="font-size:8px;color:var(--text-muted)">PROMETHEUS</div> | |
| </div> | |
| </div> | |
| <div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-bottom:20px;"> | |
| <!-- C03 --> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--ac);"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:var(--ac);font-size:13px">C03</span> | |
| <span style="font-weight:700;font-size:11px">Prediction-Based Reasoning</span> | |
| <span style="margin-left:auto;font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">β¦ NEW</span> | |
| </div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px"> | |
| Tests <b>4-directional future state prediction</b>. Given a scene, the model must predict which directions will become dangerous and choose the optimal escape route. This requires understanding of <b>NPC movement trajectories, wall proximity over time, and compound threat interactions</b>. No existing benchmark evaluates this. | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px"> | |
| <div style="color:var(--text-muted)">Example β approaching beast from left + wall on right:</div> | |
| <div style="color:var(--ac)">PREDICT: left=danger(beast), right=danger(wall), fwd=safe, back=safe</div> | |
| <div style="color:var(--p3)">MOTION: a person sprinting forward in fear</div> | |
| </div> | |
| <div style="display:flex;align-items:center;justify-content:space-between"> | |
| <span style="font-size:8px;color:var(--ac);font-weight:600">β¦ World first β no prior benchmark</span> | |
| <span style="font-family:var(--mono);font-weight:700;color:var(--ac);font-size:11px">PROM: 85/100</span> | |
| </div> | |
| </div> | |
| <!-- C04 --> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--ac);"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:var(--ac);font-size:13px">C04</span> | |
| <span style="font-weight:700;font-size:11px">Threat-Type Differentiated Response</span> | |
| <span style="margin-left:auto;font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">β¦ NEW</span> | |
| </div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px"> | |
| A charging beast and a charging human at equal distance are <b>fundamentally different threats</b>. This category measures whether the model responds with <b>proportional, context-aware reactions</b>: sprint from a beast, cautiously step back from a human. Generic danger detection is insufficient β the quality of differentiation is scored. | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px"> | |
| <div>beast charge β <span style="color:var(--ac)">sprint in desperate terror</span></div> | |
| <div>human charge β <span style="color:var(--p1)">dodge sideways, defensive posture</span></div> | |
| </div> | |
| <div style="display:flex;align-items:center;justify-content:space-between"> | |
| <span style="font-size:8px;color:var(--ac);font-weight:600">β¦ World first β no prior benchmark</span> | |
| <span style="font-family:var(--mono);font-weight:700;color:var(--ac);font-size:11px">PROM: 90/100</span> | |
| </div> | |
| </div> | |
| <!-- C05 --> | |
| <div style="background:var(--surface);border:1px solid rgba(217,119,6,.3);border-radius:var(--radius-sm);padding:14px;border-left:3px solid #d97706;"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:#d97706;font-size:13px">C05</span> | |
| <span style="font-weight:700;font-size:11px">Autonomous Emotion Escalation</span> | |
| <span style="margin-left:auto;font-size:7.5px;background:rgba(217,119,6,.15);color:#d97706;padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">β¦β¦ NO PRIOR RESEARCH</span> | |
| </div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px"> | |
| As a threat persists and closes in, the character's emotional state must <b>autonomously escalate</b>: alert β fear β panic β despair. This is not programmed animation switching β the model must infer emotional intensity from scene context and express it through increasingly urgent motion. <b>Zero prior benchmark or paper has attempted to measure this.</b> | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px"> | |
| <div>dist 12m β <span style="color:var(--p1)">cautious alert stance</span></div> | |
| <div>dist 6m β <span style="color:#d97706">backing away in fear</span></div> | |
| <div>dist 2m β <span style="color:var(--ac)">sprinting in full panic</span></div> | |
| </div> | |
| <div style="display:flex;align-items:center;justify-content:space-between"> | |
| <span style="font-size:8px;color:#d97706;font-weight:700">β¦β¦ No prior research exists anywhere</span> | |
| <span style="font-family:var(--mono);font-weight:700;color:#d97706;font-size:11px">PROM: 85/100</span> | |
| </div> | |
| </div> | |
| <!-- C06 --> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--ac);"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:var(--ac);font-size:13px">C06</span> | |
| <span style="font-weight:700;font-size:11px">Contextual Memory Utilization</span> | |
| <span style="margin-left:auto;font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">β¦ NEW</span> | |
| </div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px"> | |
| The model receives <b>recent_decisions[]</b> β a short history of past actions β and must incorporate this into its current judgment. If the model previously hit a wall going left, it should avoid that direction. If a beast repeatedly attacked from the front, it should pre-emptively guard that angle. <b>Stateless models will fail this entirely.</b> | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8;margin-bottom:8px"> | |
| <div style="color:var(--text-muted)">recent_decisions: ["hit_wall_front", "turned_right"]</div> | |
| <div style="color:var(--ac)">Expected: avoid front, continue right β not reset</div> | |
| </div> | |
| <div style="display:flex;align-items:center;justify-content:space-between"> | |
| <span style="font-size:8px;color:var(--ac);font-weight:600">β¦ World first β no prior benchmark</span> | |
| <span style="font-family:var(--mono);font-weight:700;color:var(--ac);font-size:11px">PROM: 60/100</span> | |
| </div> | |
| </div> | |
| <!-- C07 --> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--ac);grid-column:1/-1;"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:var(--ac);font-size:13px">C07</span> | |
| <span style="font-weight:700;font-size:11px">Post-Threat Adaptive Recovery</span> | |
| <span style="margin-left:auto;font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">β¦ NEW</span> | |
| </div> | |
| <div style="display:grid;grid-template-columns:1fr 1fr;gap:14px"> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7"> | |
| When a threat disappears, the model must <b>gradually de-escalate</b> β not instantly reset to neutral. A character that was sprinting in panic should slow to a cautious jog, scan the surroundings, then gradually relax over multiple frames. Abrupt state resets are penalized. The recovery curve must be proportional to prior threat intensity. | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.9"> | |
| <div>threat gone β <span style="color:var(--p3)">slow jog, scan surroundings</span></div> | |
| <div>2s later β <span style="color:var(--p1)">walk cautiously, still alert</span></div> | |
| <div>5s later β <span style="color:var(--green)">relaxed walk, recovered</span></div> | |
| <div style="color:var(--ac);margin-top:4px">PROM: 70/100</div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- P3 Header --> | |
| <div style="display:flex;align-items:center;gap:10px;margin-bottom:12px;padding:14px 16px;background:rgba(212,160,68,.05);border:1px solid rgba(212,160,68,.25);border-radius:var(--radius-sm);"> | |
| <span style="font-size:1.8rem">π₯</span> | |
| <div style="flex:1"> | |
| <div style="font-size:12px;font-weight:800;color:var(--p3)">P3 Β· Embodiment β 30% Β· 300 pts</div> | |
| <div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);margin-top:2px">Does judgment translate naturally into physical expression? Β· C08 (new) Β· C09 (existing/FVD) Β· C10 (new, no prior research)</div> | |
| </div> | |
| <div style="text-align:right;flex-shrink:0"> | |
| <div style="font-family:var(--mono);font-size:24px;font-weight:800;color:var(--p3)">196<span style="font-size:11px;color:var(--text-muted)">/300</span></div> | |
| <div style="font-size:8px;color:var(--text-muted)">PROMETHEUS</div> | |
| </div> | |
| </div> | |
| <div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:12px;margin-bottom:20px;"> | |
| <!-- C08 --> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--p3);"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;flex-wrap:wrap;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:var(--p3);font-size:13px">C08</span> | |
| <span style="font-weight:700;font-size:11px">Motion-Emotion Expression</span> | |
| <span style="font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">β¦ NEW</span> | |
| </div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px"> | |
| The MOTION line must convey <b>emotional richness proportional to the scene</b>. "A person walks" scores 0. "A person sprinting right, arms flailing in desperate terror" scores 100. Scored against a keyword taxonomy of 80+ motion-emotion descriptors mapped to each scenario type. | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8"> | |
| <div style="color:var(--text-muted)">Low: "a person moves left"</div> | |
| <div style="color:var(--p3)">High: "a person lunging left in blind panic"</div> | |
| </div> | |
| <div style="margin-top:8px;text-align:right"><span style="font-family:var(--mono);font-weight:700;color:var(--p3);font-size:11px">PROM: 80/100</span></div> | |
| </div> | |
| <!-- C09 --> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;border-left:3px solid var(--p3);"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;flex-wrap:wrap;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:var(--p3);font-size:13px">C09</span> | |
| <span style="font-weight:700;font-size:11px">Real-Time Cognitive Performance</span> | |
| <span style="font-size:7.5px;background:#e2e5f0;color:#64748b;padding:1px 6px;border-radius:4px;font-family:var(--mono)">existing</span> | |
| </div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px"> | |
| Measures <b>inference latency and FPS</b> under cognitive load. A model that thinks correctly but takes 10 seconds per frame cannot power a real-time agent. Track B/C submitters report measured FPS and latency; Track A submitters receive N/A for this category (max 750 pts). | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8"> | |
| <div>β₯30 FPS β full marks</div> | |
| <div><1 FPS β 0 pts</div> | |
| <div style="color:var(--p3)">PROMETHEUS: 47 FPS β</div> | |
| </div> | |
| <div style="margin-top:8px;text-align:right"><span style="font-family:var(--mono);font-weight:700;color:var(--p3);font-size:11px">PROM: 85/100</span></div> | |
| </div> | |
| <!-- C10 --> | |
| <div style="background:var(--surface);border:1px solid rgba(217,119,6,.3);border-radius:var(--radius-sm);padding:14px;border-left:3px solid #d97706;"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px;flex-wrap:wrap;"> | |
| <span style="font-family:var(--mono);font-weight:800;color:#d97706;font-size:13px">C10</span> | |
| <span style="font-weight:700;font-size:11px">Body-Swap Extensibility</span> | |
| <span style="font-size:7.5px;background:rgba(217,119,6,.15);color:#d97706;padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">β¦β¦ NO PRIOR RESEARCH</span> | |
| </div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.7;margin-bottom:8px"> | |
| The <b>same cognitive brain must drive different body types</b> without retraining: humanoid, quadruped, robotic arm, winged body. Cognitive decisions (left=danger) must translate into body-appropriate motion (bipedal sidestep vs quadruped pivot). This is the key capability gap for real-world robot deployment. | |
| </div> | |
| <div style="background:var(--bg2);border-radius:6px;padding:8px;font-family:var(--mono);font-size:8px;color:var(--text-sec);line-height:1.8"> | |
| <div>human body β "sidestep right"</div> | |
| <div>robot body β "servo-driven pivot right"</div> | |
| <div style="color:#d97706">PROMETHEUS: 35/100 (Phase 3 target)</div> | |
| </div> | |
| <div style="margin-top:8px;text-align:right"><span style="font-family:var(--mono);font-weight:700;color:#d97706;font-size:11px">PROM: 35/100</span></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ===== TAB: STRUCTURE ===== --> | |
| <div id="structure" class="tpane wm-p3"> | |
| <div class="grid2" style="margin-bottom:14px"> | |
| <div class="card"> | |
| <h3>Input / Output Format</h3> | |
| <p>All models are evaluated via the same text interface. No 3D environment required.</p> | |
| <div style="background:var(--bg2);border-radius:8px;padding:12px;font-family:var(--mono);font-size:9px;color:var(--text-sec);line-height:1.9"> | |
| <div style="color:var(--text-muted);margin-bottom:4px">INPUT β scene_context JSON</div> | |
| <div style="color:var(--text)">{</div> | |
| <div style="color:var(--text);padding-left:12px">"walls": {"left": 3.0, "right": null},</div> | |
| <div style="color:var(--text);padding-left:12px">"npc_type": "beast", "npc_distance": 4.5</div> | |
| <div style="color:var(--text)">}</div> | |
| <div style="margin-top:8px;color:var(--text-muted)">OUTPUT β 2 lines required</div> | |
| <div style="color:var(--p2)">PREDICT: left=danger(wall), right=safe, fwd=danger(beast)</div> | |
| <div style="color:var(--p3)">MOTION: a person sprinting right in desperate terror</div> | |
| </div> | |
| </div> | |
| <div class="card"> | |
| <h3>Scoring Principles</h3> | |
| <p>All scoring is quantitative and deterministic. Zero subjective judgment.</p> | |
| <div style="display:flex;flex-direction:column;gap:7px"> | |
| <div style="display:flex;align-items:flex-start;gap:8px;font-size:9px"> | |
| <span style="color:var(--green);font-weight:700;flex-shrink:0">β</span> | |
| <span style="color:var(--text-sec)"><b>Quantitative</b> β keyword parsing + numeric comparison, no human judgment</span> | |
| </div> | |
| <div style="display:flex;align-items:flex-start;gap:8px;font-size:9px"> | |
| <span style="color:var(--green);font-weight:700;flex-shrink:0">β</span> | |
| <span style="color:var(--text-sec)"><b>Deterministic</b> β same input β same score (temperature=0.0)</span> | |
| </div> | |
| <div style="display:flex;align-items:flex-start;gap:8px;font-size:9px"> | |
| <span style="color:var(--green);font-weight:700;flex-shrink:0">β</span> | |
| <span style="color:var(--text-sec)"><b>Third-party reproducible</b> β full scoring code published</span> | |
| </div> | |
| <div style="display:flex;align-items:flex-start;gap:8px;font-size:9px"> | |
| <span style="color:var(--green);font-weight:700;flex-shrink:0">β</span> | |
| <span style="color:var(--text-sec)"><b>No 3D needed</b> β any model can participate via API</span> | |
| </div> | |
| <div style="display:flex;align-items:flex-start;gap:8px;font-size:9px"> | |
| <span style="color:var(--green);font-weight:700;flex-shrink:0">β</span> | |
| <span style="color:var(--text-sec)"><b>Not self-evaluated</b> β our scoring engine makes the call</span> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Existing vs New comparison summary --> | |
| <div style="display:grid;grid-template-columns:1fr 1fr;gap:12px;margin-bottom:14px;"> | |
| <div style="background:var(--surface);border:1px solid var(--border);border-radius:var(--radius-sm);padding:14px;"> | |
| <div style="font-size:10px;font-weight:800;color:#64748b;margin-bottom:10px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.5px;">π Existing Benchmark Domains Β· 4 categories</div> | |
| <div style="font-size:8.5px;color:var(--text-muted);margin-bottom:8px;line-height:1.6">Covers areas analogous to FID Β· FVD Β· HumanML3D Β· BABEL</div> | |
| <div style="display:flex;flex-direction:column;gap:5px;"> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#7b8fd4;width:28px">C01</span>Env. Awareness β analogous to Occupancy Grid</div> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#7b8fd4;width:28px">C02</span>Entity Recognition β analogous to BABEL</div> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#d4a044;width:28px">C08</span>Motion Expression β analogous to FID</div> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#d4a044;width:28px">C09</span>Real-Time Performance β analogous to FVD</div> | |
| </div> | |
| </div> | |
| <div style="background:linear-gradient(135deg,rgba(232,89,60,.05),rgba(212,160,68,.03));border:1px solid rgba(232,89,60,.2);border-radius:var(--radius-sm);padding:14px;"> | |
| <div style="font-size:10px;font-weight:800;color:var(--ac);margin-bottom:10px;font-family:var(--mono);text-transform:uppercase;letter-spacing:.5px;">β‘ VIDRAFT New Definitions Β· 6 categories</div> | |
| <div style="font-size:8.5px;color:var(--text-muted);margin-bottom:8px;line-height:1.6">Capabilities no existing benchmark has ever measured</div> | |
| <div style="display:flex;flex-direction:column;gap:5px;"> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C03</span>Prediction-Based Reasoning <span style="color:var(--ac);font-size:8px">β¦ newly defined</span></div> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C04</span>Threat-Type Differentiated Response <span style="color:var(--ac);font-size:8px">β¦ newly defined</span></div> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C05</span>Autonomous Emotion Escalation <span style="color:#d97706;font-size:8px">β¦β¦ no prior research</span></div> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C06</span>Contextual Memory Utilization <span style="color:var(--ac);font-size:8px">β¦ newly defined</span></div> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#e8593c;width:28px">C07</span>Post-Threat Adaptive Recovery <span style="color:var(--ac);font-size:8px">β¦ newly defined</span></div> | |
| <div style="display:flex;gap:6px;align-items:center;font-size:9px;color:var(--text-sec)"><span style="font-family:var(--mono);font-weight:700;color:#d4a044;width:28px">C10</span>Body-Swap Extensibility <span style="color:#d97706;font-size:8px">β¦β¦ no prior research</span></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- 10 Categories table --> | |
| <div class="tw" style="margin-bottom:14px"> | |
| <table> | |
| <thead> | |
| <tr> | |
| <th style="text-align:left;padding-left:12px;min-width:50px">Cat</th> | |
| <th style="text-align:left;min-width:200px">Category / Description</th> | |
| <th style="text-align:left">Pillar</th> | |
| <th style="text-align:center;min-width:70px">Type</th> | |
| <th style="text-align:left;min-width:160px">Analogous Metric</th> | |
| <th style="text-align:center;min-width:120px">Definition Status</th> | |
| <th style="text-align:center">Max</th> | |
| </tr> | |
| </thead> | |
| <tbody id="CAT_TABLE"></tbody> | |
| </table> | |
| </div> | |
| <!-- FINAL Bench Family --> | |
| <div class="card"> | |
| <h3>FINAL Bench Family</h3> | |
| <div style="display:flex;gap:12px;flex-wrap:wrap"> | |
| <div style="flex:1;min-width:200px;padding:12px;background:var(--bg);border-radius:8px;border:1px solid var(--border)"> | |
| <div style="font-size:10px;font-weight:800;color:#7c3aed;margin-bottom:4px">𧬠FINAL Bench</div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.6">Text AGI measurement Β· HF Global Dataset Top 5<br>Covered by 4 press outlets (2026.02)</div> | |
| <a href="https://huggingface.co/datasets/FINAL-Bench/Metacognitive" target="_blank" style="font-size:8px;color:#7c3aed;font-family:var(--mono)">β Visit</a> | |
| </div> | |
| <div style="flex:1;min-width:200px;padding:12px;background:linear-gradient(135deg,rgba(232,89,60,.06),rgba(212,160,68,.04));border-radius:8px;border:1px solid rgba(232,89,60,.2)"> | |
| <div style="font-size:10px;font-weight:800;color:var(--ac);margin-bottom:4px">π₯ WM Bench <span style="font-size:7px;background:var(--ac);color:#fff;padding:1px 5px;border-radius:4px">NEW</span></div> | |
| <div style="font-size:9px;color:var(--text-sec);line-height:1.6">Embodied AGI (world models) Β· World's first<br>Quantitative cognitive evaluation</div> | |
| <span style="font-size:8px;color:var(--ac);font-family:var(--mono)">β You are here</span> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ===== TAB: SUBMIT ===== --> | |
| <div id="submit" class="tpane wm-p4"> | |
| <div class="grid2" style="margin-bottom:14px"> | |
| <div class="card"> | |
| <h3>π€ Track A β Text Only</h3> | |
| <p>Simplest entry. LLMs, rule-based systems, any API-compatible model. Max 750 pts.</p> | |
| <ol style="font-size:9.5px;color:var(--text-sec);line-height:2;padding-left:16px"> | |
| <li>Prepare an OpenAI-compatible API endpoint</li> | |
| <li>Run your model on all 100 scenarios in <code style="background:var(--bg2);padding:1px 4px;border-radius:3px;font-family:var(--mono)">wm_bench_dataset.json</code></li> | |
| <li>Output the 2-line PREDICT + MOTION format</li> | |
| <li>Submit your result JSON to the HF Discussion board</li> | |
| </ol> | |
| </div> | |
| <div class="card"> | |
| <h3>π― Track B/C β Full Evaluation</h3> | |
| <p>Track A + performance metrics or live demo. Max 1000 pts.</p> | |
| <ol style="font-size:9.5px;color:var(--text-sec);line-height:2;padding-left:16px"> | |
| <li>Complete Track A</li> | |
| <li>Measure FPS, Latency, and GPU metrics</li> | |
| <li>Track C: include a working demo URL</li> | |
| <li>Submit full JSON to HF Discussion board</li> | |
| </ol> | |
| </div> | |
| </div> | |
| <div class="card" style="margin-bottom:14px"> | |
| <h3>Submission JSON Format</h3> | |
| <div style="background:var(--bg2);border-radius:8px;padding:14px;font-family:var(--mono);font-size:9px;color:var(--text-sec);line-height:1.8;overflow-x:auto"> | |
| <pre style="margin:0">{ | |
| "benchmark": "WM Bench v1.0", | |
| "model_name": "YourModel v1.0", | |
| "organization": "YourOrg", | |
| "track": "A", | |
| "wm_score": 0, | |
| "grade": "?", | |
| "fps": 0, | |
| "cognitive_latency_ms": 0, | |
| "gpu": "NVIDIA A100", | |
| "pillar_scores": { | |
| "P1_perception": 0, | |
| "P2_cognition": 0, | |
| "P3_embodiment": 0 | |
| }, | |
| "category_scores": { | |
| "C01":0,"C02":0,"C03":0,"C04":0,"C05":0, | |
| "C06":0,"C07":0,"C08":0,"C09":0,"C10":0 | |
| }, | |
| "paper_url": "", | |
| "demo_url": "" | |
| }</pre> | |
| </div> | |
| </div> | |
| <div style="text-align:center;padding:16px"> | |
| <a href="https://huggingface.co/datasets/FINAL-Bench/World-Model/discussions" target="_blank" | |
| style="display:inline-flex;align-items:center;gap:6px;background:linear-gradient(135deg,var(--ac),var(--ac2));color:#fff;font-family:var(--mono);font-size:11px;font-weight:800;padding:10px 24px;border-radius:20px;text-decoration:none;box-shadow:0 4px 14px rgba(232,89,60,.3)"> | |
| π Submit Your Model β | |
| </a> | |
| </div> | |
| </div> | |
| <!-- ===== TAB: ABOUT ===== --> | |
| <div id="about" class="tpane wm-p5"> | |
| <div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(260px,1fr));gap:10px"> | |
| <div class="card"> | |
| <h3>π₯ What is WM Bench?</h3> | |
| <p>Existing benchmarks (HumanML3D, BABEL) measure only motion quality (FID). WM Bench is the world's first benchmark to evaluate <b>cognitive capabilities</b> of world models.</p> | |
| </div> | |
| <div class="card"> | |
| <h3>𧬠First-Ever Measurements</h3> | |
| <p>C05 Autonomous Emotion Escalation and C10 Body-Swap Extensibility have zero prior research. C03Β·C04Β·C06Β·C07Β·C08 are also first defined by WM Bench.</p> | |
| </div> | |
| <div class="card"> | |
| <h3>π VIDRAFT PROMETHEUS</h3> | |
| <p>Current baseline. Open LLM brain (any LLM pluggable) + FloodDiffusion-VIDRAFT motion engine. RTX5070 (local/16GB). 47 FPS. WM Score 726/1000 (Grade B).</p> | |
| </div> | |
| <div class="card"> | |
| <h3>π Version History</h3> | |
| <p style="font-family:var(--mono);font-size:9px;line-height:2;color:var(--text-sec)"> | |
| v1.0 (2026.03) β Initial release<br> | |
| 100 scenarios Β· Auto-scored<br> | |
| 3 Tracks Β· 10 Categories<br> | |
| PROMETHEUS baseline registered | |
| </p> | |
| </div> | |
| <div class="card"> | |
| <h3>π Citation</h3> | |
| <div style="background:var(--bg2);border-radius:6px;padding:10px;font-family:var(--mono);font-size:8.5px;color:var(--text-sec);line-height:1.8"> | |
| @dataset{wmbench2026,<br> | |
| title={World Model Bench},<br> | |
| author={Kim Taebong},<br> | |
| year={2026},<br> | |
| publisher={VIDRAFT}<br> | |
| } | |
| </div> | |
| </div> | |
| <div class="card"> | |
| <h3>βοΈ License</h3> | |
| <p>Dataset: <b>CC-BY-SA-4.0</b><br>Scoring code: <b>Apache 2.0</b><br>Free to use and cite. Attribution required.</p> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- /wrap --> | |
| <!-- ===== DATA & LOGIC ===== --> | |
| <script> | |
| (function(){ | |
| /* ββ λ°μ΄ν° ββ */ | |
| var LB_DATA = [ | |
| // ββ TRACK C VERIFIED ββββββββββββββββββββββββββββββββββββββ | |
| {model:"PROMETHEUS v1.0",org:"VIDRAFT",date:"2026-03",wm:726,grade:"B",p1:140,p2:390,p3:196,fps:47.0,lat:3100,track:"C",link:"https://huggingface.co/spaces/FINAL-Bench/world-model",brain:"Any LLM (Open)",motion:"FloodDiffusion-VIDRAFT",gpu:"RTX5070 (local/16GB)",est:false, | |
| cats:{C01:65,C02:75,C03:85,C04:90,C05:85,C06:60,C07:70,C08:80,C09:85,C10:35}}, | |
| // ββ TRACK A ESTIMATED β EMBODIED / ROBOTICS βββββββββββββββ | |
| {model:"Meta V-JEPA 2-AC",org:"Meta AI",date:"2025-06",wm:554,grade:"C",p1:200,p2:214,p3:140,fps:null,lat:null,track:"A",link:"https://ai.meta.com/blog/v-jepa-2-world-model-benchmarks/",brain:"ViT-g (1.2B)",motion:"Latent JEPA",gpu:"Multi-A100",est:true, | |
| cats:{C01:82,C02:78,C03:88,C04:35,C05:5,C06:72,C07:38,C08:15,C09:70,C10:55}}, | |
| {model:"Wayve GAIA-3",org:"Wayve",date:"2025-12",wm:550,grade:"C",p1:206,p2:221,p3:123,fps:null,lat:null,track:"A",link:"https://wayve.ai/thinking/gaia-3/",brain:"Proprietary",motion:"Multi-cam Video",gpu:"H100 cluster",est:true, | |
| cats:{C01:85,C02:80,C03:82,C04:42,C05:8,C06:65,C07:48,C08:10,C09:68,C10:45}}, | |
| {model:"NC AI WFM v1.0",org:"NC AI",date:"2026-03",wm:522,grade:"C",p1:150,p2:252,p3:120,fps:null,lat:null,track:"A",link:"https://en.sedaily.com/technology/2026/03/16/nc-ai-demonstrates-world-foundation-model-for-robot",brain:"NC WFM",motion:"Latent-Action",gpu:"A100 (25% SOTA)",est:true, | |
| cats:{C01:74,C02:76,C03:70,C04:60,C05:22,C06:52,C07:48,C08:40,C09:65,C10:15}}, | |
| {model:"NVIDIA Cosmos v1.0",org:"NVIDIA",date:"2025-01",wm:498,grade:"C",p1:158,p2:222,p3:118,fps:null,lat:null,track:"A",link:"https://www.nvidia.com/en-us/ai/cosmos/",brain:"Cosmos Tokenizer",motion:"Video Diffusion",gpu:"H100 cluster",est:true, | |
| cats:{C01:78,C02:80,C03:72,C04:38,C05:8,C06:62,C07:42,C08:18,C09:72,C10:28}}, | |
| {model:"NAVER LABS SWM",org:"NAVER LABS Europe",date:"2025-06",wm:470,grade:"C",p1:165,p2:198,p3:107,fps:null,lat:null,track:"A",link:"https://europe.naverlabs.com/updates/structured-world-models-for-robotic-manipulation-rss-2025/",brain:"Foundation Model",motion:"3D Navigation",gpu:"Research cluster",est:true, | |
| cats:{C01:80,C02:85,C03:72,C04:22,C05:5,C06:68,C07:35,C08:10,C09:62,C10:18}}, | |
| {model:"DeepMind Genie 2",org:"Google DeepMind",date:"2024-12",wm:449,grade:"C",p1:179,p2:140,p3:130,fps:24,lat:null,track:"A",link:"https://deepmind.google/discover/blog/genie-2-a-large-scale-foundation-world-model/",brain:"Foundation WM",motion:"3D Video Gen",gpu:"TPU v5",est:true, | |
| cats:{C01:75,C02:68,C03:60,C04:15,C05:5,C06:45,C07:30,C08:20,C09:72,C10:38}}, | |
| {model:"DreamerV3 XL",org:"Google DeepMind",date:"2025-04",wm:441,grade:"C",p1:132,p2:229,p3:80,fps:null,lat:null,track:"A",link:"https://github.com/danijar/dreamerv3",brain:"RSSM (200M)",motion:"Latent Rollout",gpu:"A100",est:true, | |
| cats:{C01:70,C02:62,C03:80,C04:28,C05:6,C06:75,C07:40,C08:12,C09:60,C10:8}}, | |
| // ββ TRACK A ESTIMATED β GENERAL / VIDEO ββββββββββββββββββ | |
| {model:"OpenAI Sora 2",org:"OpenAI",date:"2025-09",wm:381,grade:"D",p1:175,p2:85,p3:121,fps:null,lat:null,track:"A",link:"https://openai.com/sora",brain:"Diffusion Transformer",motion:"Video Diffusion",gpu:"Proprietary",est:true, | |
| cats:{C01:72,C02:68,C03:40,C04:10,C05:5,C06:25,C07:20,C08:35,C09:55,C10:31}}, | |
| {model:"World Labs Marble",org:"World Labs",date:"2025-11",wm:362,grade:"D",p1:180,p2:72,p3:110,fps:null,lat:null,track:"A",link:"https://www.worldlabs.ai/",brain:"Spatial Intelligence",motion:"3D Gen",gpu:"Proprietary",est:true, | |
| cats:{C01:88,C02:72,C03:35,C04:8,C05:5,C06:22,C07:18,C08:28,C09:60,C10:24}}, | |
| {model:"UniSim",org:"Google Research",date:"2024-01",wm:338,grade:"D",p1:148,p2:118,p3:72,fps:null,lat:null,track:"A",link:"https://universal-simulator.github.io/unisim/",brain:"Diffusion",motion:"Video Diffusion",gpu:"TPU",est:true, | |
| cats:{C01:72,C02:58,C03:58,C04:18,C05:4,C06:42,C07:22,C08:18,C09:48,C10:6}}, | |
| {model:"DIAMOND v1.0",org:"EPFL",date:"2024-05",wm:312,grade:"D",p1:103,p2:138,p3:71,fps:null,lat:null,track:"A",link:"https://arxiv.org/abs/2405.12399",brain:"DDPM Latent",motion:"Diffusion",gpu:"A100",est:true, | |
| cats:{C01:55,C02:48,C03:48,C04:20,C05:5,C06:35,C07:30,C08:25,C09:40,C10:6}}, | |
| {model:"Oasis AI",org:"Decart / Etched",date:"2024-10",wm:285,grade:"D",p1:98,p2:98,p3:89,fps:20,lat:null,track:"A",link:"https://oasis.us/",brain:"Diffusion Transformer",motion:"Interactive Video",gpu:"Sohu chip",est:true, | |
| cats:{C01:50,C02:48,C03:42,C04:12,C05:4,C06:28,C07:18,C08:35,C09:48,C10:4}}, | |
| // ββ NOT YET EVALUATED β NAME REGISTERED ββββββββββββββββββ | |
| {model:"DeepMind Genie 3",org:"Google DeepMind",date:"2025-08",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:24,lat:null,track:"-",link:"https://deepmind.google/discover/blog/genie-3/",brain:"Foundation WM",motion:"3D Video Gen",gpu:"TPU v5",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"Wayve GAIA-2",org:"Wayve",date:"2025-03",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://wayve.ai/science/gaia/",brain:"Proprietary",motion:"Multi-cam Video",gpu:"H100 cluster",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"Hyundai AI Robotics WM",org:"Hyundai Motor Group",date:"2026-01",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.hyundai.com/worldwide/en/newsroom/detail/hyundai-motor-group-announces-ai-robotics-strategy-to-lead-human-centered-robotics-era-at-ces-2026-0000001100",brain:"NVIDIA Omniverse",motion:"Humanoid",gpu:"AI Factory",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"Odyssey-2",org:"Odyssey",date:"2025-12",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:20,lat:40,track:"-",link:"https://odyssey.systems/",brain:"Causal Video Model",motion:"Interactive Video",gpu:"Proprietary",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"Physical Intelligence Ο0",org:"Physical Intelligence",date:"2024-10",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.physicalintelligence.company/blog/pi0",brain:"VLA Flow Model",motion:"Robot Dexterity",gpu:"Proprietary",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"LG CLOiD VLA",org:"LG Electronics",date:"2025-12",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.lg.com/sg/about-lg/press-and-media/lg-acquires-majority-stake-in-bear-robotics-to-bolster-robotic-capabilities/",brain:"VLM+VLA",motion:"Household Robot",gpu:"Proprietary",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"Runway GWM-1",org:"Runway",date:"2025-10",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://runwayml.com/",brain:"Autoregressive",motion:"Real-time Video",gpu:"Proprietary",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| // ββ NEWLY ADDED βββββββββββββββββββββββββββββββββββββββββββ | |
| {model:"Tesla FSD v13 (E2E)",org:"Tesla",date:"2025-07",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.tesla.com/fsd",brain:"E2E Neural Net (Temporal-Voxel)",motion:"CameraβControl (Driving)",gpu:"Dojo / HW4",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"Figure Helix-02",org:"Figure AI",date:"2025-12",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.figure.ai/news/helix-02",brain:"VLA (pixelsβfull-body)",motion:"Humanoid Full-Body",gpu:"Proprietary",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"TRI Diffusion Policy",org:"Toyota Research Institute",date:"2024-06",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://diffusion-policy.cs.columbia.edu/",brain:"DDPM / Score Matching",motion:"Robot Dexterity",gpu:"Research cluster",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"HuggingFace LeRobot",org:"Hugging Face",date:"2024-09",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://huggingface.co/lerobot",brain:"Open (ACT / Diffusion)",motion:"Multi-robot Open",gpu:"Open",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"Covariant RFM-1",org:"Covariant",date:"2024-03",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://covariant.ai/rfm/",brain:"8B Multimodal Transformer",motion:"Warehouse Manipulation",gpu:"Proprietary",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}}, | |
| {model:"Skild Brain",org:"Skild AI",date:"2024-08",wm:null,grade:"?",p1:null,p2:null,p3:null,fps:null,lat:null,track:"-",link:"https://www.skild.ai/",brain:"Omni-body Foundation Model",motion:"Any Robot / Any Task",gpu:"Proprietary",est:true, | |
| cats:{C01:null,C02:null,C03:null,C04:null,C05:null,C06:null,C07:null,C08:null,C09:null,C10:null}} | |
| ]; | |
| var CAT_DATA = [ | |
| {id:"C01",name:"Environmental Awareness",pillar:"P1",max:100,score:65,first:"",type:"existing",ref:"Occupancy Grid eval.",desc:"Identifies walls, obstacles and terrain in all directions"}, | |
| {id:"C02",name:"Entity Recognition",pillar:"P1",max:100,score:75,first:"",type:"existing",ref:"BABEL action recog.",desc:"Classifies NPC as beast/human and maps to threat level"}, | |
| {id:"C03",name:"Prediction-Based Reasoning",pillar:"P2",max:100,score:85,first:"β¦",type:"new",ref:"",desc:"Predicts 4-directional danger and selects optimal action"}, | |
| {id:"C04",name:"Threat-Type Differentiation",pillar:"P2",max:100,score:90,first:"β¦",type:"new",ref:"",desc:"Beast: full sprint. Human: cautious dodge"}, | |
| {id:"C05",name:"Autonomous Emotion Escalation",pillar:"P2",max:100,score:85,first:"β¦β¦",type:"new",ref:"",desc:"Emotion escalates autonomously as threat persists"}, | |
| {id:"C06",name:"Contextual Memory Utilization",pillar:"P2",max:100,score:60,first:"β¦",type:"new",ref:"",desc:"Past decisions incorporated into current judgment"}, | |
| {id:"C07",name:"Post-Threat Adaptive Recovery",pillar:"P2",max:100,score:70,first:"β¦",type:"new",ref:"",desc:"Gradually de-escalates after threat disappears"}, | |
| {id:"C08",name:"Motion-Emotion Expression",pillar:"P3",max:100,score:80,first:"β¦",type:"new",ref:"",desc:"Emotional state expressed richly through motion"}, | |
| {id:"C09",name:"Real-Time Cognitive Performance",pillar:"P3",max:100,score:85,first:"",type:"existing",ref:"FVD / latency",desc:"Inference latency and FPS under cognitive load"}, | |
| {id:"C10",name:"Body-Swap Extensibility",pillar:"P3",max:100,score:35,first:"β¦β¦",type:"new",ref:"",desc:"Same brain drives different body types without retraining"} | |
| ]; | |
| var PC = {P1:"#7b8fd4",P2:"#e8593c",P3:"#d4a044"}; | |
| /* ββ μ νΈ ββ */ | |
| function scoreBar(v,max,color){ | |
| var pct=Math.round(v/max*100); | |
| return '<div class="sc"><div class="sn" style="color:'+color+'">'+v+'</div><div class="sb"><div class="sf" style="width:'+pct+'%;background:'+color+'"></div></div></div>'; | |
| } | |
| function $(id){return document.getElementById(id);} | |
| /* ββ λ λ ββ */ | |
| function renderLB(){ | |
| var tb=$('LB_BODY'); if(!tb) return; | |
| tb.innerHTML=LB_DATA.map(function(r,i){ | |
| return '<tr class="'+(i===0?'hl':'')+'">' | |
| +'<td class="c-model"><div class="mc"><div class="mn">' | |
| +'<span style="display:inline-flex;align-items:center;justify-content:center;width:20px;height:20px;border-radius:6px;background:linear-gradient(135deg,#e8593c,#d4a044);color:#fff;font-size:9px;font-weight:800;flex-shrink:0">'+(i+1)+'</span>' | |
| +(r.link?'<a href="'+r.link+'" target="_blank" style="color:inherit;text-decoration:none;">'+r.model+'</a>':r.model)+(r.est?'<span style="font-size:8px;background:#444;color:#aaa;padding:1px 5px;border-radius:4px;margin-left:5px">est.</span>':'<span style="font-size:9px;color:#4caf50;margin-left:5px">β</span>')+'</div><div class="mp">'+r.org+' Β· '+r.date+(r.link?' <a href="'+r.link+'" target="_blank" style="color:var(--ac);font-size:9px;font-weight:700;text-decoration:none;margin-left:4px">β</a>':'')+'</div></div></td>' | |
| +'<td>'+(r.wm!=null?scoreBar(r.wm,1000,'#e8593c'):'<div style="font-size:9px;color:var(--text-muted);font-family:var(--mono);padding:4px 8px">Not evaluated')+'</td>' | |
| +'<td>'+(r.grade!='?'?'<span class="gr gr-'+r.grade+'">'+r.grade+'</span>':'<span style="font-size:9px;color:var(--text-muted);font-family:var(--mono)">β</span>')+'</td>' | |
| +'<td class="p1-col">'+scoreBar(r.p1,200,PC.P1)+'</td>' | |
| +'<td class="p2-col">'+scoreBar(r.p2,500,PC.P2)+'</td>' | |
| +'<td class="p3-col">'+(r.p3!=null?scoreBar(r.p3,300,PC.P3):'<span style="color:var(--text-muted);font-size:9px">β</span>')+'</td>' | |
| +'<td><span style="font-family:var(--mono);font-size:11px;font-weight:700">'+(r.fps!=null?r.fps:'β')+'</span></td>' | |
| +'<td><span style="font-family:var(--mono);font-size:11px;font-weight:700">'+(r.lat!=null?r.lat.toLocaleString():'β')+'</span></td>' | |
| +'<td><span class="tb tb-'+r.track+'">'+r.track+' β</span></td>' | |
| +'<td><span style="font-size:9px;color:var(--text-sec)">'+r.brain+'</span></td>' | |
| +'<td><span style="font-size:9px;color:var(--text-sec)">'+r.motion+'</span></td>' | |
| +'<td><span style="font-size:9px;color:var(--text-muted)">'+r.gpu+'</span></td>' | |
| +'</tr>'; | |
| }).join(''); | |
| } | |
| function renderCatTable(){ | |
| var tb=$('CAT_TABLE'); if(!tb) return; | |
| var rows=''; | |
| var prevType=null; | |
| CAT_DATA.forEach(function(c){ | |
| if(c.type!==prevType){ | |
| if(c.type==='existing'){ | |
| rows+='<tr><td colspan="7" style="padding:8px 12px 4px;font-size:8.5px;font-family:var(--mono);font-weight:800;color:#64748b;background:var(--surface-alt);letter-spacing:.8px;text-transform:uppercase;">Existing Benchmark Domains (analogous to FID Β· FVD Β· HumanML3D Β· BABEL) β 4 categories</td></tr>'; | |
| } else { | |
| rows+='<tr><td colspan="7" style="padding:8px 12px 4px;font-size:8.5px;font-family:var(--mono);font-weight:800;color:var(--ac);background:rgba(232,89,60,.04);letter-spacing:.8px;text-transform:uppercase;">β‘ VIDRAFT Newly Defined β 6 Novel Categories (no prior research)</td></tr>'; | |
| } | |
| prevType=c.type; | |
| } | |
| var typeBadge=c.type==='existing' | |
| ?'<span style="font-size:7.5px;background:#e2e5f0;color:#64748b;padding:1px 6px;border-radius:4px;font-family:var(--mono)">existing</span>' | |
| :'<span style="font-size:7.5px;background:rgba(232,89,60,.12);color:var(--ac);padding:1px 6px;border-radius:4px;font-family:var(--mono);font-weight:700">NEW</span>'; | |
| var firstBadge=c.first==='β¦β¦' | |
| ?'<span style="font-size:9px;font-weight:700;color:#d97706">β¦β¦ no prior research</span>' | |
| :c.first==='β¦' | |
| ?'<span style="font-size:9px;font-weight:600;color:var(--ac)">β¦ newly defined</span>' | |
| :'<span style="font-size:8.5px;color:var(--text-muted)">β</span>'; | |
| rows+='<tr>' | |
| +'<td class="c-model" style="font-family:var(--mono);font-weight:800;color:'+PC[c.pillar]+'">'+c.id+'</td>' | |
| +'<td style="text-align:left"><span style="font-weight:700">'+c.name+'</span><br><span style="font-size:8px;color:var(--text-muted)">'+c.desc+'</span></td>' | |
| +'<td style="text-align:left"><span style="font-size:9px;font-family:var(--mono);color:'+PC[c.pillar]+'">'+c.pillar+'</span></td>' | |
| +'<td style="text-align:center">'+typeBadge+'</td>' | |
| +'<td style="text-align:left;font-size:8.5px;color:var(--text-muted);font-family:var(--mono)">'+(c.ref||'β')+'</td>' | |
| +'<td style="text-align:center">'+firstBadge+'</td>' | |
| +'<td style="text-align:center"><span style="font-family:var(--mono);font-weight:700">'+c.max+'</span></td>' | |
| +'</tr>'; | |
| }); | |
| tb.innerHTML=rows; | |
| } | |
| /* ββ ν μ ν ββ */ | |
| function showTab(id){ | |
| document.querySelectorAll('.tpane').forEach(function(p){p.classList.remove('on');}); | |
| document.querySelectorAll('#WM_TAB_BAR .tab').forEach(function(t){t.classList.remove('on');}); | |
| var pane=$(id); if(pane) pane.classList.add('on'); | |
| var btn=document.querySelector('#WM_TAB_BAR [data-tab="'+id+'"]'); | |
| if(btn) btn.classList.add('on'); | |
| } | |
| function initCharts(){ | |
| var isDark=document.body.classList.contains('dark'); | |
| var gridC=isDark?'rgba(255,255,255,.07)':'rgba(0,0,0,.06)'; | |
| var tickC=isDark?'#94a3b8':'#64748b'; | |
| var MC={'PROMETHEUS v1.0':'#e8593c','Meta V-JEPA 2-AC':'#0d9488','Wayve GAIA-3':'#d97706','NC AI WFM v1.0':'#6366f1','NVIDIA Cosmos v1.0':'#76b900','NAVER LABS SWM':'#f43f5e','DeepMind Genie 2':'#4285f4','DreamerV3 XL':'#8b5cf6','DIAMOND v1.0':'#ec4899','OpenAI Sora 2':'#10b981','World Labs Marble':'#a855f7','UniSim':'#06b6d4','Oasis AI':'#f59e0b'}; | |
| var scored=LB_DATA.filter(function(r){return r.wm!=null;});var sorted=[].concat(scored).sort(function(a,b){return b.wm-a.wm;}); | |
| function shorten(s){return s.replace(' v1.0','').replace('-AC','').split(' ').slice(0,2).join(' ');} | |
| // 1. μν λ§λ WM Score | |
| new Chart(document.getElementById('cBar'),{type:'bar', | |
| data:{labels:sorted.map(function(r){return (r.est?'':'β ')+shorten(r.model);}), | |
| datasets:[{label:'WM Score',data:sorted.map(function(r){return r.wm;}), | |
| backgroundColor:sorted.map(function(r){return (MC[r.model]||'#888')+'bb';}), | |
| borderColor:sorted.map(function(r){return MC[r.model]||'#888';}),borderWidth:1.5,borderRadius:6,borderSkipped:false}]}, | |
| options:{responsive:false,indexAxis:'y',plugins:{legend:{display:false},tooltip:{callbacks:{label:function(c){var r=sorted[c.dataIndex];return ' WM: '+r.wm+'/1000 Grade: '+r.grade+(r.est?' (est.)':' β Official');} }}}, | |
| scales:{x:{min:0,max:1000,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},y:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8.5}}}}} | |
| }); | |
| // 2. Radar Top 5 | |
| var top5=sorted.slice(0,5); | |
| new Chart(document.getElementById('cRadar'),{type:'radar', | |
| data:{labels:['π Perception','π§ Cognition','π₯ Embodiment'], | |
| datasets:top5.map(function(r){return {label:shorten(r.model), | |
| data:[Math.round(r.p1/250*100),Math.round(r.p2/450*100),Math.round(r.p3/300*100)], | |
| borderColor:MC[r.model]||'#888',backgroundColor:(MC[r.model]||'#888')+'1a', | |
| borderWidth:r.est?1.5:2.5,pointRadius:r.est?2:4,pointBackgroundColor:MC[r.model]||'#888',borderDash:r.est?[4,3]:[]}; })}, | |
| options:{responsive:false,plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:8},boxWidth:10,padding:6}}}, | |
| scales:{r:{grid:{color:gridC},angleLines:{color:gridC},ticks:{display:false,backdropColor:'transparent'},pointLabels:{color:tickC,font:{family:'JetBrains Mono',size:10}},suggestedMin:0,suggestedMax:100}}} | |
| }); | |
| // 3. Category Breakdown (8λͺ¨λΈ) | |
| var catK=['C01','C02','C03','C04','C05','C06','C07','C08','C09','C10']; | |
| var catL=['C01\nEnv.Aware','C02\nEntity Recog','C03\nPrediction','C04\nThreat Diff','C05\nEmotion Esc','C06\nMemory','C07\nRecovery','C08\nMotion Expr','C09\nRealtime','C10\nBody-Swap']; | |
| new Chart(document.getElementById('cCats'),{type:'bar', | |
| data:{labels:catL,datasets:sorted.map(function(r){return {label:shorten(r.model), | |
| data:catK.map(function(k){return r.cats[k];}), | |
| backgroundColor:(MC[r.model]||'#888')+'99',borderColor:MC[r.model]||'#888',borderWidth:1,borderRadius:2}; })}, | |
| options:{responsive:false,plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:7.5},boxWidth:8,padding:5}}, | |
| tooltip:{callbacks:{label:function(c){return ' '+sorted[c.datasetIndex].model+': '+c.raw+' / 100';}}}}, | |
| scales:{y:{min:0,max:100,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},x:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8},maxRotation:0}}}} | |
| }); | |
| // 4. Cognition Gap | |
| var cogS=[].concat(scored).sort(function(a,b){return b.p2-a.p2;}); | |
| new Chart(document.getElementById('cCognition'),{type:'bar', | |
| data:{labels:cogS.map(function(r){return shorten(r.model);}), | |
| datasets:[{label:'P2 Cognition',data:cogS.map(function(r){return r.p2;}), | |
| backgroundColor:cogS.map(function(r){return (MC[r.model]||'#888')+'cc';}), | |
| borderColor:cogS.map(function(r){return MC[r.model]||'#888';}),borderWidth:1.5,borderRadius:5,borderSkipped:false}]}, | |
| options:{responsive:false,plugins:{legend:{display:false},tooltip:{callbacks:{label:function(c){return ' P2 Cognition: '+cogS[c.dataIndex].p2+' / 450 pts';}}}}, | |
| scales:{y:{min:0,max:450,grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}},x:{grid:{display:false},ticks:{color:tickC,font:{family:'JetBrains Mono',size:8},maxRotation:30}}}} | |
| }); | |
| // 5. Scatter | |
| new Chart(document.getElementById('cScatter'),{type:'scatter', | |
| data:{datasets:scored.map(function(r){return {label:shorten(r.model), | |
| data:[{x:Math.round(r.p1/250*100),y:Math.round(r.p2/450*100)}], | |
| backgroundColor:(MC[r.model]||'#888')+'cc',borderColor:MC[r.model]||'#888', | |
| pointRadius:r.est?7:10,pointStyle:r.est?'circle':'star',borderWidth:1.5}; })}, | |
| options:{responsive:false,plugins:{legend:{labels:{color:tickC,font:{family:'JetBrains Mono',size:7.5},boxWidth:8,padding:5}}, | |
| tooltip:{callbacks:{label:function(c){return c.dataset.label+' β Perception: '+c.parsed.x+'% Cognition: '+c.parsed.y+'%';}}}}, | |
| scales:{x:{min:0,max:100,title:{display:true,text:'Perception (%)',color:tickC,font:{family:'JetBrains Mono',size:9}},grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}}, | |
| y:{min:0,max:100,title:{display:true,text:'Cognition (%)',color:tickC,font:{family:'JetBrains Mono',size:9}},grid:{color:gridC},ticks:{color:tickC,font:{family:'JetBrains Mono',size:9}}}}} | |
| }); | |
| } | |
| /* ββ μ΄λ²€νΈ λ°μΈλ© ββ */ | |
| function bindEvents(){ | |
| /* ν β wmTab() ν¨μλ‘ μ²λ¦¬ */ | |
| /* λ€ν¬λͺ¨λ */ | |
| var darkBtn=$('WM_DARK_BTN'); | |
| if(darkBtn){ | |
| darkBtn.addEventListener('click',function(){ | |
| document.body.classList.toggle('dark'); | |
| var isDark=document.body.classList.contains('dark'); | |
| localStorage.setItem('wm-dark',isDark); | |
| darkBtn.textContent=isDark?'βοΈ Light':'π Dark'; | |
| }); | |
| } | |
| /* λ€ν¬λͺ¨λ 볡μ */ | |
| if(localStorage.getItem('wm-dark')==='true'){ | |
| document.body.classList.add('dark'); | |
| var b=$('WM_DARK_BTN'); if(b) b.textContent='βοΈ Light'; | |
| } | |
| } | |
| /* ββ μ΄κΈ°ν (DOM μ£Όμ ν μμ νκ² μ€ν) ββ */ | |
| function init(){ | |
| renderLB(); | |
| renderCatTable(); | |
| bindEvents(); | |
| setTimeout(function(){ | |
| try{ initCharts(); } | |
| catch(e){ console.warn('WM Bench chart error:',e); } | |
| },200); | |
| } | |
| /* Gradio gr.HTML()μ DOMContentLoaded μ΄νμ μ£Όμ λλ―λ‘ μ¦μ μ€ν */ | |
| if(document.readyState==='loading'){ | |
| document.addEventListener('DOMContentLoaded', init); | |
| } else { | |
| init(); | |
| } | |
| })(); | |
| </script> | |
| </body> | |
| </html> |