| { |
| "Qwen3-30B-A3B-Instruct-2507": { |
| "gsm8k": { |
| "benchmark_name": "gsm8k", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1071.2940027174511, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1488.3645940190918, |
| "accept_length": 2.6400593352844486 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1071.2940027174511, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1499.6157892300257, |
| "accept_length": 3.0113471715954674 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1071.2940027174511, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1491.1759364152986, |
| "accept_length": 2.525104073618391 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1071.2940027174511, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1438.3989235515564, |
| "accept_length": 3.1488859094681736 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1071.2940027174511, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1478.3371126866896, |
| "accept_length": 2.515156901620291 |
| } |
| ] |
| } |
| ] |
| }, |
| "math500": { |
| "benchmark_name": "math500", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1468.9518188983302, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 3022.302541558449, |
| "accept_length": 3.4018400160943374 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1468.9518188983302, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 3458.7683757488517, |
| "accept_length": 4.5001277922609 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1468.9518188983302, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2710.0700446913434, |
| "accept_length": 3.83069810232181 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1468.9518188983302, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 3636.1457092511932, |
| "accept_length": 5.29297884876688 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1468.9518188983302, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2650.9994915668844, |
| "accept_length": 3.981701201346221 |
| } |
| ] |
| } |
| ] |
| }, |
| "mtbench": { |
| "benchmark_name": "mtbench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1341.3462205459145, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2048.689292397081, |
| "accept_length": 2.495847913511255 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1341.3462205459145, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2086.117426859236, |
| "accept_length": 2.831051301639537 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1341.3462205459145, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1698.4151046745978, |
| "accept_length": 2.5572219713355357 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1341.3462205459145, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1998.1600180425269, |
| "accept_length": 2.9819193324061195 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1341.3462205459145, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1742.9797705522778, |
| "accept_length": 2.7422317575874455 |
| } |
| ] |
| } |
| ] |
| }, |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1366.6183006362219, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2618.165602951494, |
| "accept_length": 3.349328692192939 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1366.6183006362219, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2912.1392571686956, |
| "accept_length": 4.384426363785289 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1366.6183006362219, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2367.016477367958, |
| "accept_length": 3.7901897758795298 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1366.6183006362219, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 3069.9815866099266, |
| "accept_length": 5.124267515923567 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1366.6183006362219, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2363.3377665362655, |
| "accept_length": 4.030938739532834 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1492.6190597361915, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2911.405162351629, |
| "accept_length": 3.1783624121672447 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1492.6190597361915, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 3265.2547245227543, |
| "accept_length": 4.018270197787462 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1492.6190597361915, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2455.0885550482017, |
| "accept_length": 3.295517305362425 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1492.6190597361915, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 3413.029275629196, |
| "accept_length": 4.576331556763159 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1492.6190597361915, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2355.0941391264764, |
| "accept_length": 3.3973067623684012 |
| } |
| ] |
| } |
| ] |
| }, |
| "financeqa": { |
| "benchmark_name": "financeqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1320.1266846132082, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1778.9653109324079, |
| "accept_length": 2.0810309937160505 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1320.1266846132082, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1778.6778684706662, |
| "accept_length": 2.2730321793789288 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1320.1266846132082, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1652.1607344416184, |
| "accept_length": 2.2703352879266276 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1320.1266846132082, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1682.9566856293293, |
| "accept_length": 2.3032779273841584 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1320.1266846132082, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1753.6698041448958, |
| "accept_length": 2.6092096546804138 |
| } |
| ] |
| } |
| ] |
| }, |
| "gpqa": { |
| "benchmark_name": "gpqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1410.428038868636, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2237.792328921565, |
| "accept_length": 2.5958448251993995 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1410.428038868636, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2341.298191039886, |
| "accept_length": 3.0077922694984913 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1410.428038868636, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 1961.1700111065113, |
| "accept_length": 2.6947097860315505 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1410.428038868636, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2310.2053834681674, |
| "accept_length": 3.216540452331778 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1410.428038868636, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-30B-A3B-Instruct-2507-SpecForge", |
| "output_throughput": 2008.7425535412629, |
| "accept_length": 2.91748293468006 |
| } |
| ] |
| } |
| ] |
| } |
| }, |
| "Qwen3-235B-A22B-Instruct-2507": { |
| "gsm8k": { |
| "benchmark_name": "gsm8k", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 469.12940470010284, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 633.4834448509783, |
| "accept_length": 2.356716526992789 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 718.620120234308, |
| "accept_length": 2.8762828246719394 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 469.12940470010284, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 619.3961515217887, |
| "accept_length": 2.5325967285309847 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 740.8090293617215, |
| "accept_length": 3.351527622767857 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 469.12940470010284, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 685.8224688133159, |
| "accept_length": 2.2254637464335056 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 718.5200251720828, |
| "accept_length": 2.5942242348162705 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 469.12940470010284, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 622.6877352310961, |
| "accept_length": 2.577754285484885 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 758.2839780669175, |
| "accept_length": 3.51144398279758 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 469.12940470010284, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 696.9862910262393, |
| "accept_length": 2.2957518385545184 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 692.54543613971, |
| "accept_length": 2.508131344520406 |
| } |
| ] |
| } |
| ] |
| }, |
| "math500": { |
| "benchmark_name": "math500", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 587.3767625807179, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 821.7716217768141, |
| "accept_length": 2.2131311175007076 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1165.3481778903413, |
| "accept_length": 3.2287879445239853 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 587.3767625807179, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 786.5291154131861, |
| "accept_length": 2.3811060693210626 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1263.6658286467714, |
| "accept_length": 4.021472447253628 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 587.3767625807179, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 729.1280796475185, |
| "accept_length": 2.1641727527768047 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1012.7228976076004, |
| "accept_length": 3.3166681444513406 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 587.3767625807179, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 801.9730196026575, |
| "accept_length": 2.4202165987905055 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1399.195876342606, |
| "accept_length": 4.477737029876627 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 587.3767625807179, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 728.5917394731794, |
| "accept_length": 2.180077789251727 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 966.5149174357106, |
| "accept_length": 3.0996346930308336 |
| } |
| ] |
| } |
| ] |
| }, |
| "mtbench": { |
| "benchmark_name": "mtbench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 529.8952857212083, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 642.7287443329789, |
| "accept_length": 1.8722335837366109 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 814.539845630713, |
| "accept_length": 2.3454133346915906 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 529.8952857212083, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 617.9738942581079, |
| "accept_length": 1.9436368219822697 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 779.531140147999, |
| "accept_length": 2.571956737666924 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 529.8952857212083, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 579.7478777831109, |
| "accept_length": 1.879637550849381 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 684.112380410899, |
| "accept_length": 2.3538604252889965 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 529.8952857212083, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 607.3644823224199, |
| "accept_length": 1.9674055586107704 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 789.9679697718769, |
| "accept_length": 2.6698328935795956 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 529.8952857212083, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 596.0590450290033, |
| "accept_length": 1.987328547838102 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 670.0058040199536, |
| "accept_length": 2.329033512672587 |
| } |
| ] |
| } |
| ] |
| }, |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 553.0503522362385, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 866.1813723921825, |
| "accept_length": 2.533027363039563 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1068.373749600453, |
| "accept_length": 3.238804311590177 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 553.0503522362385, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 853.4917713020631, |
| "accept_length": 2.8369721532226433 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1176.5192650014792, |
| "accept_length": 4.083723300745958 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 553.0503522362385, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 772.1684975661775, |
| "accept_length": 2.5123042505592843 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1032.477913431608, |
| "accept_length": 3.6360244115082825 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 553.0503522362385, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 889.8951303902317, |
| "accept_length": 2.955997016746898 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1267.5178598410528, |
| "accept_length": 4.4874762125186445 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 553.0503522362385, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 736.1010265214783, |
| "accept_length": 2.3861131594156686 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 983.9906558013464, |
| "accept_length": 3.412326127536581 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 598.1832041732818, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 803.7805606947842, |
| "accept_length": 2.090690935434212 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1062.9796952555507, |
| "accept_length": 2.8172381425652917 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 598.1832041732818, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 759.6333115912107, |
| "accept_length": 2.2179516111790765 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1093.1979234549972, |
| "accept_length": 3.268498808394456 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 598.1832041732818, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 708.4447966909656, |
| "accept_length": 2.077364507787014 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 874.062642276262, |
| "accept_length": 2.6670587896561795 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 598.1832041732818, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 767.8685797664081, |
| "accept_length": 2.2474642743536366 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 1155.6572987907093, |
| "accept_length": 3.490068495285106 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 598.1832041732818, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 711.4663371023372, |
| "accept_length": 2.129619842542645 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 835.6105646149398, |
| "accept_length": 2.590646146520392 |
| } |
| ] |
| } |
| ] |
| }, |
| "financeqa": { |
| "benchmark_name": "financeqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 539.5161023038148, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 689.4282413740445, |
| "accept_length": 1.941237358311274 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 872.4508905377182, |
| "accept_length": 2.556773924332344 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 539.5161023038148, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 636.4408069963314, |
| "accept_length": 2.027268079304664 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 885.529748337286, |
| "accept_length": 2.8442245393804413 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 539.5161023038148, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 642.4958901994291, |
| "accept_length": 2.0553746448296777 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 730.7331843587357, |
| "accept_length": 2.4330876223070512 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 539.5161023038148, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 641.1037073226237, |
| "accept_length": 2.0361251069493296 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 889.0304393086461, |
| "accept_length": 2.965008914078923 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 539.5161023038148, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 654.3422430101997, |
| "accept_length": 2.1356956699218137 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 742.3749721046132, |
| "accept_length": 2.5176210584474528 |
| } |
| ] |
| } |
| ] |
| }, |
| "gpqa": { |
| "benchmark_name": "gpqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 563.1619467852893, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 716.6967887897075, |
| "accept_length": 2.0240035915598344 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 823.4218898853592, |
| "accept_length": 2.356617214868455 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 563.1619467852893, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 680.2044274358036, |
| "accept_length": 2.14011469258975 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 808.934577824737, |
| "accept_length": 2.6032639643837037 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 563.1619467852893, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 630.9312870281678, |
| "accept_length": 1.9776516235921864 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 698.9315763256182, |
| "accept_length": 2.2587729126518172 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 563.1619467852893, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 685.8554308455039, |
| "accept_length": 2.1591340093176212 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 826.5168292170538, |
| "accept_length": 2.6672259363465063 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 563.1619467852893, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/Qwen3-235B-A22B-EAGLE3", |
| "output_throughput": 636.0480501999019, |
| "accept_length": 2.001480647431386 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-235B-A22B-Instruct-2507-SpecForge", |
| "output_throughput": 683.7427107159214, |
| "accept_length": 2.241436629482574 |
| } |
| ] |
| } |
| ] |
| } |
| }, |
| "Qwen3-Next-80B-A3B-Instruct-FP8": { |
| "gsm8k": { |
| "benchmark_name": "gsm8k", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 549.6362180919164, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 683.8795985073891, |
| "accept_length": 3.13391215089175 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 549.6362180919164, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 753.237074543623, |
| "accept_length": 3.9038018228889597 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 549.6362180919164, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 746.7222279174218, |
| "accept_length": 4.022678679117706 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 549.6362180919164, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 771.153101164556, |
| "accept_length": 4.345554699994077 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 549.6362180919164, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 773.4012327870145, |
| "accept_length": 4.607604467310829 |
| } |
| ] |
| } |
| ] |
| }, |
| "math500": { |
| "benchmark_name": "math500", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 863.7773324206034, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1478.3001038430784, |
| "accept_length": 3.498551418454351 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 863.7773324206034, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1764.2064514729698, |
| "accept_length": 4.677160426045899 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 863.7773324206034, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1758.0166003158934, |
| "accept_length": 4.755809947207558 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 863.7773324206034, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1912.6838622508392, |
| "accept_length": 5.554967332076544 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 863.7773324206034, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1853.434631732593, |
| "accept_length": 5.756492370623537 |
| } |
| ] |
| } |
| ] |
| }, |
| "mtbench": { |
| "benchmark_name": "mtbench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 803.4970369348379, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1095.5102974622082, |
| "accept_length": 2.581125058112506 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 803.4970369348379, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1157.636689246293, |
| "accept_length": 2.9156972910237133 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 803.4970369348379, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1197.112468072539, |
| "accept_length": 3.1331585165547646 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 803.4970369348379, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1127.4364940073876, |
| "accept_length": 3.0475279197966354 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 803.4970369348379, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1198.9417562126052, |
| "accept_length": 3.4190589216409535 |
| } |
| ] |
| } |
| ] |
| }, |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 788.4509521573036, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1245.6702060145312, |
| "accept_length": 3.4647713687985653 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 788.4509521573036, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1527.7120587214345, |
| "accept_length": 4.612265133111893 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 788.4509521573036, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1536.7723048769212, |
| "accept_length": 4.676180904522613 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 788.4509521573036, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1628.1293604862747, |
| "accept_length": 5.4577785667790994 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 788.4509521573036, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1629.7244930267507, |
| "accept_length": 5.621873496873497 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 916.0337036761792, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1463.1234977160723, |
| "accept_length": 3.1058026902179443 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 916.0337036761792, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1724.2207417984275, |
| "accept_length": 3.8462516284893944 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 916.0337036761792, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1734.4894352951553, |
| "accept_length": 3.9821418050654955 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 916.0337036761792, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1786.8774464735384, |
| "accept_length": 4.2761952310299485 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 916.0337036761792, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1829.5532782765572, |
| "accept_length": 4.590307145700787 |
| } |
| ] |
| } |
| ] |
| }, |
| "financeqa": { |
| "benchmark_name": "financeqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 827.3050477430119, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 986.4282909200625, |
| "accept_length": 2.0752097090844193 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 827.3050477430119, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 981.0983772859984, |
| "accept_length": 2.1801329261720857 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 827.3050477430119, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1057.6549922432027, |
| "accept_length": 2.439575219817722 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 827.3050477430119, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 956.6098887389447, |
| "accept_length": 2.2457481515800852 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 827.3050477430119, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1041.5277102267419, |
| "accept_length": 2.606484877248997 |
| } |
| ] |
| } |
| ] |
| }, |
| "gpqa": { |
| "benchmark_name": "gpqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 909.8620481543201, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1368.9499756838852, |
| "accept_length": 2.7362548025140208 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 909.8620481543201, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1457.9918429280988, |
| "accept_length": 3.1803662497541225 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 909.8620481543201, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1511.274616068283, |
| "accept_length": 3.3682366894832594 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 909.8620481543201, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1463.9444559000415, |
| "accept_length": 3.380290412894046 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 909.8620481543201, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Next-80B-A3B-Instruct-FP8-SpecForge", |
| "output_throughput": 1541.4580844550508, |
| "accept_length": 3.7385501251645787 |
| } |
| ] |
| } |
| ] |
| } |
| }, |
| "Qwen3-Coder-30B-A3B-Instruct": { |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1296.1854608851213, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 2621.7139434700584, |
| "accept_length": 3.394971072541166 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1296.1854608851213, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 2966.4459091363574, |
| "accept_length": 4.5011526953450725 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1296.1854608851213, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 2236.868611380527, |
| "accept_length": 3.9489230027326796 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1296.1854608851213, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 3205.2025971977832, |
| "accept_length": 5.306789266712931 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1296.1854608851213, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 2553.012134540716, |
| "accept_length": 4.221071958746777 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1506.2936922288973, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 2992.02067556649, |
| "accept_length": 3.138553878632709 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1506.2936922288973, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 3328.9058789398114, |
| "accept_length": 3.9449129401751835 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1506.2936922288973, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 2541.3931549111803, |
| "accept_length": 3.336379596827288 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1506.2936922288973, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 3472.3919294148427, |
| "accept_length": 4.477776008915068 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 1506.2936922288973, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-30B-A3B-Instruct-SpecForge", |
| "output_throughput": 2552.5518885328293, |
| "accept_length": 3.5865930607956185 |
| } |
| ] |
| } |
| ] |
| } |
| }, |
| "Qwen3-Coder-480B-A35B-Instruct": { |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 470.6571664751315, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 867.5261370310272, |
| "accept_length": 3.4954686382065345 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 470.6571664751315, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 1044.4475556194586, |
| "accept_length": 4.68614810868407 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 470.6571664751315, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 945.2207076385645, |
| "accept_length": 4.2835241878943675 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 470.6571664751315, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 1165.0727231905212, |
| "accept_length": 5.626203379024545 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 470.6571664751315, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 956.5336674844815, |
| "accept_length": 4.574128043621322 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.99996954994094, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 846.6405796214389, |
| "accept_length": 3.0936425388083757 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.99996954994094, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 946.3806786937351, |
| "accept_length": 3.8547162126548313 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.99996954994094, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 817.5432981932123, |
| "accept_length": 3.3539182909649066 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.99996954994094, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 983.2554936551461, |
| "accept_length": 4.260473117512835 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.99996954994094, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Qwen3-Coder-480B-A35B-Instruct-SpecForge-EigenAI", |
| "output_throughput": 790.2818911646486, |
| "accept_length": 3.379611891844464 |
| } |
| ] |
| } |
| ] |
| } |
| }, |
| "Kimi-K2-Instruct": { |
| "gsm8k": { |
| "benchmark_name": "gsm8k", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 337.92445122816076, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 498.355967400969, |
| "accept_length": 3.271389121751566 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 337.92445122816076, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 538.7660861191819, |
| "accept_length": 4.120435815920245 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 337.92445122816076, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 476.5166831456105, |
| "accept_length": 3.5748305647840533 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 337.92445122816076, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 544.16588655688, |
| "accept_length": 4.655279611582661 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 337.92445122816076, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 459.1757114935756, |
| "accept_length": 3.4419677544677545 |
| } |
| ] |
| } |
| ] |
| }, |
| "math500": { |
| "benchmark_name": "math500", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 492.06079685961566, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 877.2113745892083, |
| "accept_length": 3.46806357521281 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 492.06079685961566, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 995.8769550545389, |
| "accept_length": 4.610169876195772 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 492.06079685961566, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 772.6100737625807, |
| "accept_length": 3.527844083399639 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 492.06079685961566, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 1022.7285831443611, |
| "accept_length": 5.383128673454291 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 492.06079685961566, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 649.083231514055, |
| "accept_length": 3.1435862587473253 |
| } |
| ] |
| } |
| ] |
| }, |
| "mtbench": { |
| "benchmark_name": "mtbench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 430.9240376244664, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 533.8166177911393, |
| "accept_length": 2.3897198230461343 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 430.9240376244664, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 526.1187611377575, |
| "accept_length": 2.738876732312181 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 430.9240376244664, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 473.3129895327435, |
| "accept_length": 2.394141207153502 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 430.9240376244664, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 488.46384825810924, |
| "accept_length": 2.7821796546219706 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 430.9240376244664, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 451.126180366313, |
| "accept_length": 2.536454493323503 |
| } |
| ] |
| } |
| ] |
| }, |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 466.0584238730984, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 779.7838793636296, |
| "accept_length": 3.364936827816644 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 466.0584238730984, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 868.550857852841, |
| "accept_length": 4.423030465709301 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 466.0584238730984, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 729.1217213710999, |
| "accept_length": 3.7321711568938194 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 466.0584238730984, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 897.9039799990946, |
| "accept_length": 5.162398550153652 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 466.0584238730984, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 669.271164663664, |
| "accept_length": 3.7044178210408085 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.12137141510016, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 841.5023790421864, |
| "accept_length": 3.162685632492396 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.12137141510016, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 904.3910288246204, |
| "accept_length": 3.943605886942718 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.12137141510016, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 716.7319007181034, |
| "accept_length": 3.1374681580049573 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.12137141510016, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 896.7006322822839, |
| "accept_length": 4.400262176061309 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 500.12137141510016, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 650.4333056536461, |
| "accept_length": 3.0780193205478037 |
| } |
| ] |
| } |
| ] |
| }, |
| "financeqa": { |
| "benchmark_name": "financeqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 433.44658979995484, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 647.3644717982133, |
| "accept_length": 2.9848269628099175 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 433.44658979995484, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 660.0254297132984, |
| "accept_length": 3.594056395834917 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 433.44658979995484, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 523.0340443308603, |
| "accept_length": 2.8796471741261027 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 433.44658979995484, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 630.5425124127137, |
| "accept_length": 3.944647875329984 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 433.44658979995484, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 389.47080223360666, |
| "accept_length": 2.5096594789735582 |
| } |
| ] |
| } |
| ] |
| }, |
| "gpqa": { |
| "benchmark_name": "gpqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 505.3742994094499, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 783.436424568974, |
| "accept_length": 2.904452196823693 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 505.3742994094499, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 811.3642458480507, |
| "accept_length": 3.4622853609057755 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 505.3742994094499, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 699.8111934038128, |
| "accept_length": 3.0198274205132876 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 505.3742994094499, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 770.4892578818251, |
| "accept_length": 3.6995331477421103 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 505.3742994094499, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Kimi-K2-Instruct-eagle3", |
| "output_throughput": 596.3162033813331, |
| "accept_length": 2.7901899604967983 |
| } |
| ] |
| } |
| ] |
| } |
| }, |
| "Ling-flash-2.0": { |
| "gsm8k": { |
| "benchmark_name": "gsm8k", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 674.3464018618124, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1144.7606179148752, |
| "accept_length": 3.4351661916604646 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 674.3464018618124, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1253.4000030615975, |
| "accept_length": 4.487906489549112 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 674.3464018618124, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1059.7381115819003, |
| "accept_length": 3.331830155824441 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 674.3464018618124, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1323.0093663978187, |
| "accept_length": 5.148644964283767 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 674.3464018618124, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1026.8025294413142, |
| "accept_length": 3.126593214481735 |
| } |
| ] |
| } |
| ] |
| }, |
| "math500": { |
| "benchmark_name": "math500", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 762.7113399535667, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1434.6065070935829, |
| "accept_length": 3.4340471141971713 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 762.7113399535667, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1607.3212268988339, |
| "accept_length": 4.493397164127635 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 762.7113399535667, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1383.6720582197756, |
| "accept_length": 3.7931376508179415 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 762.7113399535667, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1685.5692612687462, |
| "accept_length": 5.218245374511558 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 762.7113399535667, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1330.1086623703009, |
| "accept_length": 3.793696144088135 |
| } |
| ] |
| } |
| ] |
| }, |
| "mtbench": { |
| "benchmark_name": "mtbench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 728.5278345617202, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1022.5890920470158, |
| "accept_length": 2.392568385378843 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 728.5278345617202, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 990.0430932236113, |
| "accept_length": 2.648161574313827 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 728.5278345617202, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 914.3899001110539, |
| "accept_length": 2.5161251562049407 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 728.5278345617202, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 942.3914903299366, |
| "accept_length": 2.771332137960131 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 728.5278345617202, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 968.0479918450316, |
| "accept_length": 2.8558805412179527 |
| } |
| ] |
| } |
| ] |
| }, |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 740.2477168580639, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1271.2889448808319, |
| "accept_length": 3.1471241394625804 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 740.2477168580639, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1353.1437889143726, |
| "accept_length": 3.9318483282257697 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 740.2477168580639, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1175.4192382338058, |
| "accept_length": 3.29687986547923 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 740.2477168580639, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1358.9726439538854, |
| "accept_length": 4.370163501574083 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 740.2477168580639, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1141.7913416362687, |
| "accept_length": 3.3590013964490297 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 770.3957537752161, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1305.1833791876973, |
| "accept_length": 2.9790301516097895 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 770.3957537752161, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1366.417326281792, |
| "accept_length": 3.6103649876590875 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 770.3957537752161, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1130.7868943433502, |
| "accept_length": 2.8933133857317164 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 770.3957537752161, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1345.6741018953574, |
| "accept_length": 3.9330923185867093 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 770.3957537752161, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1061.6897228931932, |
| "accept_length": 2.902182106883942 |
| } |
| ] |
| } |
| ] |
| }, |
| "financeqa": { |
| "benchmark_name": "financeqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 747.7098566179897, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 863.8565336005082, |
| "accept_length": 1.907102314310342 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 747.7098566179897, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 833.1235940586521, |
| "accept_length": 2.047546254809973 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 747.7098566179897, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 798.9811798480557, |
| "accept_length": 1.9372590117256243 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 747.7098566179897, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 763.2761511276084, |
| "accept_length": 2.0470985454359427 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 747.7098566179897, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 779.3060665006524, |
| "accept_length": 2.045476819601249 |
| } |
| ] |
| } |
| ] |
| }, |
| "gpqa": { |
| "benchmark_name": "gpqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 794.1289733679167, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1185.7250147683403, |
| "accept_length": 2.562389392369937 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 794.1289733679167, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1161.8732670284553, |
| "accept_length": 2.886871902842324 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 794.1289733679167, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1052.640023467198, |
| "accept_length": 2.6017604302340236 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 794.1289733679167, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1111.996259596397, |
| "accept_length": 3.0648124985786733 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 794.1289733679167, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "AQ-MedAI/Ling-Flash-2.0-eagle3", |
| "output_throughput": 1004.4992021266573, |
| "accept_length": 2.6709053367549105 |
| } |
| ] |
| } |
| ] |
| } |
| }, |
| "Llama-3.1-8B-Instruct": { |
| "gsm8k": { |
| "benchmark_name": "gsm8k", |
| "results": [ |
| { |
| "batch_size": 1, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 181.81151788749455, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 228.64232714994796, |
| "accept_length": 1.7165139181419709 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 321.2528041157779, |
| "accept_length": 2.5481878001819607 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 181.81151788749455, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 213.550264904667, |
| "accept_length": 1.7634936642258956 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 329.6873220645443, |
| "accept_length": 2.8537845395516377 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 181.81151788749455, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 195.13619448514442, |
| "accept_length": 1.7528912619638426 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 251.43922505539766, |
| "accept_length": 2.2820562939796716 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 181.81151788749455, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 197.901650893672, |
| "accept_length": 1.7742552127753433 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 317.61058794222197, |
| "accept_length": 2.9733251079580505 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 181.81151788749455, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 182.0257072155964, |
| "accept_length": 1.789228234172427 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 240.85801894998306, |
| "accept_length": 2.367398432594591 |
| } |
| ] |
| } |
| ] |
| }, |
| "math500": { |
| "benchmark_name": "math500", |
| "results": [ |
| { |
| "batch_size": 1, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 191.04076784280642, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 399.2995452070592, |
| "accept_length": 2.7825411590459592 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 492.28246574028134, |
| "accept_length": 3.4786948176583494 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 191.04076784280642, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 422.40466722576286, |
| "accept_length": 3.254684892147128 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 594.5033645961273, |
| "accept_length": 4.624857400180126 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 191.04076784280642, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 387.0489467031037, |
| "accept_length": 3.3070174292508296 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 480.43534296060534, |
| "accept_length": 4.116159164796923 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 191.04076784280642, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 413.57783551553456, |
| "accept_length": 3.489213277012106 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 638.0439777096752, |
| "accept_length": 5.402844266750837 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 191.04076784280642, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 326.8790406711244, |
| "accept_length": 3.072066504990206 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 453.306808098541, |
| "accept_length": 4.25573095185686 |
| } |
| ] |
| } |
| ] |
| }, |
| "mtbench": { |
| "benchmark_name": "mtbench", |
| "results": [ |
| { |
| "batch_size": 1, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.98120707576373, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 414.90616666264776, |
| "accept_length": 2.930670028119849 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 404.24667749722187, |
| "accept_length": 2.8980726819445777 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.98120707576373, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 453.73692243041774, |
| "accept_length": 3.554148008484563 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 446.6366476858434, |
| "accept_length": 3.5164393144456105 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.98120707576373, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 338.6308027570883, |
| "accept_length": 2.9393909722902185 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 346.46724606666106, |
| "accept_length": 3.0061221366256823 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.98120707576373, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 454.730035166582, |
| "accept_length": 3.906676145543851 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 450.03198538047087, |
| "accept_length": 3.855839765261211 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.98120707576373, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 305.1648971387325, |
| "accept_length": 2.9089536379397125 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 308.00561770283963, |
| "accept_length": 2.938163437236731 |
| } |
| ] |
| } |
| ] |
| }, |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 1, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.91017930680567, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 432.8677712430711, |
| "accept_length": 3.0469174293472796 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 465.1765542307934, |
| "accept_length": 3.3398192040568846 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.91017930680567, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 479.1212006261437, |
| "accept_length": 3.7445769729930163 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 548.9370103875078, |
| "accept_length": 4.318366474235621 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.91017930680567, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 340.2704451839945, |
| "accept_length": 2.9425913908717285 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 377.47349118830954, |
| "accept_length": 3.2519286521546853 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.91017930680567, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 480.3152659024827, |
| "accept_length": 4.0959237477185155 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 571.4886457684788, |
| "accept_length": 4.910129659643436 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.91017930680567, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 311.1051926955927, |
| "accept_length": 2.9338537387017256 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 330.15665770360005, |
| "accept_length": 3.126203604641593 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 1, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.70410640395912, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 380.6915537026263, |
| "accept_length": 2.6893540748536475 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 439.67672671912396, |
| "accept_length": 3.16861704188786 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.70410640395912, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 398.3738662742165, |
| "accept_length": 3.1199565043209523 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 506.22686693578754, |
| "accept_length": 3.9957244075250427 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.70410640395912, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 322.29847741557273, |
| "accept_length": 2.771756050751679 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 375.34956052924895, |
| "accept_length": 3.236171472299629 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.70410640395912, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 391.25705242634194, |
| "accept_length": 3.334862665932587 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 516.904537338255, |
| "accept_length": 4.466856034741759 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 189.70410640395912, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 287.68205157705233, |
| "accept_length": 2.7148899046029547 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 378.8468257829908, |
| "accept_length": 3.585376494197714 |
| } |
| ] |
| } |
| ] |
| }, |
| "financeqa": { |
| "benchmark_name": "financeqa", |
| "results": [ |
| { |
| "batch_size": 1, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 185.6534194378935, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 237.18050733350836, |
| "accept_length": 1.713236561734993 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 258.6437346257605, |
| "accept_length": 1.9050339301460721 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 185.6534194378935, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 226.67848476067016, |
| "accept_length": 1.8075300109130592 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 254.48969338840087, |
| "accept_length": 2.043805528134255 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 185.6534194378935, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 210.94791438286492, |
| "accept_length": 1.8654798891594593 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 251.07710462288492, |
| "accept_length": 2.2264818220398923 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 185.6534194378935, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 211.18454065719607, |
| "accept_length": 1.8434056761268782 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 240.6034453504167, |
| "accept_length": 2.1029710512950737 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 185.6534194378935, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 183.72672690273865, |
| "accept_length": 1.7817737292479987 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 229.82170237350869, |
| "accept_length": 2.250341575212658 |
| } |
| ] |
| } |
| ] |
| }, |
| "gpqa": { |
| "benchmark_name": "gpqa", |
| "results": [ |
| { |
| "batch_size": 1, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.4500188461883, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 409.86415544506445, |
| "accept_length": 2.8552892726009724 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 442.54523731909666, |
| "accept_length": 3.135712400558006 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.4500188461883, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 438.0519648397228, |
| "accept_length": 3.3792158666871135 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 507.1290934019136, |
| "accept_length": 3.936040126357265 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.4500188461883, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 352.1689105895484, |
| "accept_length": 3.026258098612226 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 413.1686528229548, |
| "accept_length": 3.5475168823860437 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.4500188461883, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 434.1788724748705, |
| "accept_length": 3.6819800875461333 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 514.2312383540044, |
| "accept_length": 4.357665531437638 |
| } |
| ] |
| }, |
| { |
| "batch_size": 1, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 190.4500188461883, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.1-Instruct-8B", |
| "output_throughput": 311.5910755177637, |
| "accept_length": 2.9283727399165507 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.1-8B-Instruct-SpecForge", |
| "output_throughput": 390.64506651929287, |
| "accept_length": 3.692280754414928 |
| } |
| ] |
| } |
| ] |
| } |
| }, |
| "Llama-3.3-70B-Instruct": { |
| "gsm8k": { |
| "benchmark_name": "gsm8k", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 453.2156138501392, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 521.4502791575164, |
| "accept_length": 1.2760798037239203 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Spec for ge", |
| "output_throughput": 837.9426300003847, |
| "accept_length": 2.3179247901200304 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 453.2156138501392, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 500.5534332009228, |
| "accept_length": 1.2836005168205962 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 855.6400225608106, |
| "accept_length": 2.4851382017038057 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 453.2156138501392, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 500.33326156436937, |
| "accept_length": 1.3482255389718076 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 758.9001336688345, |
| "accept_length": 2.12511673151751 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 453.2156138501392, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 483.12653680688, |
| "accept_length": 1.2856745693167546 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 820.5175400063332, |
| "accept_length": 2.516910489405022 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 453.2156138501392, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 480.4218686725539, |
| "accept_length": 1.3936331604189096 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 739.405741336959, |
| "accept_length": 2.222061210294459 |
| } |
| ] |
| } |
| ] |
| }, |
| "math500": { |
| "benchmark_name": "math500", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 567.3739460148672, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1088.844896763402, |
| "accept_length": 2.3720131878590123 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1273.7733416283656, |
| "accept_length": 2.841736535013628 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 567.3739460148672, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1122.2476729474943, |
| "accept_length": 2.5920045204124875 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1382.9357431087456, |
| "accept_length": 3.243898689873717 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 567.3739460148672, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1112.8479569335152, |
| "accept_length": 2.792588962605549 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1274.2110431983278, |
| "accept_length": 3.2416170775479363 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 567.3739460148672, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1079.9951811356827, |
| "accept_length": 2.6718376973892366 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1327.6044700788502, |
| "accept_length": 3.3766338373668217 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 567.3739460148672, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1090.3170854344964, |
| "accept_length": 2.966812280063099 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1215.8347875575441, |
| "accept_length": 3.3641021480547684 |
| } |
| ] |
| } |
| ] |
| }, |
| "mtbench": { |
| "benchmark_name": "mtbench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 540.4640557255416, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1234.647877556777, |
| "accept_length": 2.9232673267326734 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1238.4736758319698, |
| "accept_length": 2.9606951984177083 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 540.4640557255416, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1377.8052334866013, |
| "accept_length": 3.5324281309061973 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1409.5100765643524, |
| "accept_length": 3.6175162329362442 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 540.4640557255416, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1129.6661036217977, |
| "accept_length": 3.143848893296669 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1108.3072501756835, |
| "accept_length": 3.2248797608215263 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 540.4640557255416, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1425.2993761886291, |
| "accept_length": 3.8789368991048736 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1440.3671955624673, |
| "accept_length": 3.97791186891054 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 540.4640557255416, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1069.4986663607351, |
| "accept_length": 3.1943331425300516 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1033.773238205561, |
| "accept_length": 3.2422141262192974 |
| } |
| ] |
| } |
| ] |
| }, |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.9500728009846, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1194.0875984832494, |
| "accept_length": 2.6663626344392504 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1290.1122375104421, |
| "accept_length": 2.925804965875309 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.9500728009846, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1282.7936401185236, |
| "accept_length": 3.0671719811813904 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1426.372333907719, |
| "accept_length": 3.436568804650481 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.9500728009846, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1090.1088508973057, |
| "accept_length": 2.8127895941495002 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1174.0867819009864, |
| "accept_length": 3.0611013660766493 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.9500728009846, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1267.8737053510965, |
| "accept_length": 3.1906793120660706 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1407.8140138598972, |
| "accept_length": 3.6735002608242047 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.9500728009846, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1013.2705272855593, |
| "accept_length": 2.7776112847805305 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 968.2027451202639, |
| "accept_length": 2.742653690956563 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.8834615148919, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1210.6010917932015, |
| "accept_length": 2.723797958423008 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1295.014267720614, |
| "accept_length": 2.952023346303502 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.8834615148919, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1303.4195570335166, |
| "accept_length": 3.133414966360772 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1423.2736941362525, |
| "accept_length": 3.4980468448438247 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.8834615148919, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1070.711661408102, |
| "accept_length": 2.735034762087001 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1154.785652335772, |
| "accept_length": 2.9811645516106386 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.8834615148919, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1279.5345355421975, |
| "accept_length": 3.284394784770605 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1399.3991191944933, |
| "accept_length": 3.716324359708698 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 560.8834615148919, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 1013.3765756840332, |
| "accept_length": 2.773990564681233 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1035.4140338795994, |
| "accept_length": 2.933293078243183 |
| } |
| ] |
| } |
| ] |
| }, |
| "financeqa": { |
| "benchmark_name": "financeqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 512.5751663875466, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 704.0737829344649, |
| "accept_length": 1.645732050137249 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 936.4940018423655, |
| "accept_length": 2.2541347317466722 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 512.5751663875466, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 684.0195321200449, |
| "accept_length": 1.702027072988232 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 933.0572305312112, |
| "accept_length": 2.39442380929992 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 512.5751663875466, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 618.4946534541955, |
| "accept_length": 1.7860533893688224 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 700.886442439991, |
| "accept_length": 2.281622206910129 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 512.5751663875466, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 652.1412786559076, |
| "accept_length": 1.7116903633491312 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 887.7001871678323, |
| "accept_length": 2.452738257649581 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 512.5751663875466, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 635.2599880909434, |
| "accept_length": 1.9610333607746286 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 854.0347909075315, |
| "accept_length": 2.589833798374378 |
| } |
| ] |
| } |
| ] |
| }, |
| "gpqa": { |
| "benchmark_name": "gpqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 575.6879373469175, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 962.5545831639148, |
| "accept_length": 2.0451300999292217 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1020.0538308626681, |
| "accept_length": 2.1911976817371235 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 575.6879373469175, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 963.8356757692138, |
| "accept_length": 2.1687507495755036 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 1039.643962895085, |
| "accept_length": 2.3552079123829617 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 575.6879373469175, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 890.1003387342033, |
| "accept_length": 2.226321240698847 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 960.5616523564485, |
| "accept_length": 2.4811411267352264 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 575.6879373469175, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 916.6826693888017, |
| "accept_length": 2.1849745643049188 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 984.4877550429275, |
| "accept_length": 2.4152394292465176 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 575.6879373469175, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-LLaMA3.3-Instruct-70B", |
| "output_throughput": 838.0962787179271, |
| "accept_length": 2.3145643059121785 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-3.3-70B-Instruct-Specforge", |
| "output_throughput": 924.0808096194634, |
| "accept_length": 2.573260793115575 |
| } |
| ] |
| } |
| ] |
| } |
| }, |
| "Llama-4-Scout-17B-16E-Instruct": { |
| "gsm8k": { |
| "benchmark_name": "gsm8k", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 455.9311905316165, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 816.6176343207234, |
| "accept_length": 2.435108707729916 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 908.8655650704263, |
| "accept_length": 3.1118742007294085 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 455.9311905316165, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 806.5328373116205, |
| "accept_length": 2.6234459324405357 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 971.8534490877095, |
| "accept_length": 3.8715801886792454 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 455.9311905316165, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 708.8133468064259, |
| "accept_length": 2.146746247607535 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 818.3072714693558, |
| "accept_length": 2.918526679710503 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 455.9311905316165, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 765.9810114809961, |
| "accept_length": 2.675257522087863 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 957.227019602509, |
| "accept_length": 4.307217442700466 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 455.9311905316165, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 675.0775309782273, |
| "accept_length": 2.144316290813106 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 814.5839518607636, |
| "accept_length": 2.627502101582583 |
| } |
| ] |
| } |
| ] |
| }, |
| "math500": { |
| "benchmark_name": "math500", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 561.835811548351, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1478.9989946720648, |
| "accept_length": 2.366719134681358 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1884.3462895109676, |
| "accept_length": 3.238557789111507 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 561.835811548351, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1447.5513200323323, |
| "accept_length": 2.5898901840327406 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 2100.7682204066577, |
| "accept_length": 4.153214423200308 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 561.835811548351, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1199.1485073659853, |
| "accept_length": 2.489558557182447 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1457.2169829849418, |
| "accept_length": 3.2046972238757507 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 561.835811548351, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1330.0337890073868, |
| "accept_length": 2.648556845221877 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 2110.3314050998847, |
| "accept_length": 4.7805795395081105 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 561.835811548351, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1153.7706965189202, |
| "accept_length": 2.6314392278632304 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1369.6607164745208, |
| "accept_length": 3.2076523352436657 |
| } |
| ] |
| } |
| ] |
| }, |
| "mtbench": { |
| "benchmark_name": "mtbench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 502.10114738381606, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1252.9681990096112, |
| "accept_length": 2.3541095408844828 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1302.3829223511154, |
| "accept_length": 2.4913843888070693 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 502.10114738381606, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1225.4607594389363, |
| "accept_length": 2.5648559607722956 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1312.399917450856, |
| "accept_length": 2.836414637256152 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 502.10114738381606, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 953.148992300308, |
| "accept_length": 2.222710749523974 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 967.1281111811169, |
| "accept_length": 2.3256101583113455 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 502.10114738381606, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1157.0433602013916, |
| "accept_length": 2.649528603387664 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1276.9552963643773, |
| "accept_length": 3.0189181867437243 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 502.10114738381606, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 940.9893388280037, |
| "accept_length": 2.3959043407227965 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1010.4098410869198, |
| "accept_length": 2.7008052625609618 |
| } |
| ] |
| } |
| ] |
| }, |
| "humaneval": { |
| "benchmark_name": "humaneval", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 631.8746804703884, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1515.800628974162, |
| "accept_length": 2.664927494512612 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1749.0012751674196, |
| "accept_length": 3.224152798137449 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 631.8746804703884, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1556.515161340629, |
| "accept_length": 3.085438335809807 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1921.2922045342316, |
| "accept_length": 4.140846637369973 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 631.8746804703884, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1201.849883743592, |
| "accept_length": 2.6006220481511346 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1393.1592557980014, |
| "accept_length": 3.1744799971652315 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 631.8746804703884, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1456.346786965349, |
| "accept_length": 3.2582381225462083 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1944.8214954525663, |
| "accept_length": 4.7947306331104995 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 631.8746804703884, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1109.058302621911, |
| "accept_length": 2.6508010386556267 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1234.7042057027743, |
| "accept_length": 3.0442784990549376 |
| } |
| ] |
| } |
| ] |
| }, |
| "livecodebench": { |
| "benchmark_name": "livecodebench", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 484.2501137181978, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1598.2921930690502, |
| "accept_length": 2.487202280374381 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1933.9962764283844, |
| "accept_length": 3.14740116583215 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 484.2501137181978, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1601.2688464385185, |
| "accept_length": 2.8043640587405627 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 2144.3319751584095, |
| "accept_length": 3.983057732747085 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 484.2501137181978, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1051.7266219288254, |
| "accept_length": 2.1138485934104656 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1320.656674087923, |
| "accept_length": 2.7145795398417976 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 484.2501137181978, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1501.558947290443, |
| "accept_length": 2.929916684169992 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 2170.188140733029, |
| "accept_length": 4.55060712303548 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 484.2501137181978, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1009.5574686537159, |
| "accept_length": 2.2590065740745002 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1249.8114756626915, |
| "accept_length": 2.8130523194007555 |
| } |
| ] |
| } |
| ] |
| }, |
| "financeqa": { |
| "benchmark_name": "financeqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 288.9007335547823, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1022.713052476267, |
| "accept_length": 1.7952034022379475 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1189.61672405822, |
| "accept_length": 2.2164571332464367 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 288.9007335547823, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 963.8209003406079, |
| "accept_length": 1.8240590609583607 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1171.8275957081507, |
| "accept_length": 2.408275220827522 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 288.9007335547823, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 755.8055387643059, |
| "accept_length": 1.780077619663648 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 887.65933899505, |
| "accept_length": 2.1907344347752975 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 288.9007335547823, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 885.0003924094965, |
| "accept_length": 1.864155494076754 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1084.5573704005851, |
| "accept_length": 2.459442783236034 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 288.9007335547823, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 773.7660016870891, |
| "accept_length": 2.05643096671835 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 838.3207906571789, |
| "accept_length": 2.1910908349096845 |
| } |
| ] |
| } |
| ] |
| }, |
| "gpqa": { |
| "benchmark_name": "gpqa", |
| "results": [ |
| { |
| "batch_size": 8, |
| "steps": 3, |
| "topk": 1, |
| "num_draft_tokens": 4, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 541.0010469896803, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1320.0198779778916, |
| "accept_length": 2.0166714112874526 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1482.2781495871964, |
| "accept_length": 2.3200242800296755 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 1, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 541.0010469896803, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1258.0775283103167, |
| "accept_length": 2.135039169677331 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1468.3432054658438, |
| "accept_length": 2.5528455284552845 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 5, |
| "topk": 3, |
| "num_draft_tokens": 6, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 541.0010469896803, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1405.110892125768, |
| "accept_length": 2.8834021014937705 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1502.213627081269, |
| "accept_length": 3.0623772161357583 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 1, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 541.0010469896803, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1148.5409144989237, |
| "accept_length": 2.1684843736177633 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1379.1223204247422, |
| "accept_length": 2.672381928590287 |
| } |
| ] |
| }, |
| { |
| "batch_size": 8, |
| "steps": 7, |
| "topk": 4, |
| "num_draft_tokens": 8, |
| "metrics": [ |
| { |
| "Name": "Wihtout EAGLE3", |
| "output_throughput": 541.0010469896803, |
| "accept_length": 1.0 |
| }, |
| { |
| "Name": "lmsys/sglang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-v1", |
| "output_throughput": 1345.7377508882935, |
| "accept_length": 3.044341630328194 |
| }, |
| { |
| "Name": "lmsys/SGLang-EAGLE3-Llama-4-Scout-17B-16E-Instruct-SpecForge", |
| "output_throughput": 1474.1967930541948, |
| "accept_length": 3.315005686664771 |
| } |
| ] |
| } |
| ] |
| } |
| } |
| } |
|
|