|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>MCP Benchmark Leaderboard</title> |
|
<link rel="stylesheet" href="style.css"> |
|
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet"> |
|
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet"> |
|
</head> |
|
<body> |
|
<div class="container"> |
|
|
|
<header class="paper-header"> |
|
<h1 class="paper-title">MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers</h1> |
|
<div class="paper-authors"> |
|
<p>Zhenting Wang, Qi Chang, Hemani Patel, Shashank Biju, Cheng-En Wu, Quan Liu, Aolin Ding, Alireza Rezazadeh, Ankit Shah, Yujia Bao, Eugene Siow</p> |
|
<p class="affiliation">Accenture, UC Berkeley</p> |
|
</div> |
|
<div class="paper-links"> |
|
<a href="https://github.com/Accenture/mcp-bench" class="paper-link"> |
|
<i class="fab fa-github"></i> GitHub |
|
</a> |
|
<a href="https://arxiv.org/abs/2508.20453" class="paper-link"> |
|
<i class="fas fa-file-pdf"></i> Paper |
|
</a> |
|
<a href="#leaderboard" class="paper-link"> |
|
<i class="fas fa-trophy"></i> Leaderboard |
|
</a> |
|
</div> |
|
</header> |
|
|
|
|
|
<section class="diagram-section"> |
|
<img src="mcp-bench.png" alt="MCP-Bench Architecture Diagram" class="diagram-image"> |
|
<p class="diagram-caption"> |
|
MCP-Bench is a comprehensive evaluation framework designed to assess Large Language Models' (LLMs) capabilities in tool-use scenarios through the Model Context Protocol (MCP). This benchmark provides an end-to-end pipeline for evaluating how effectively different LLMs can discover, select, and utilize tools to solve real-world tasks. |
|
</p> |
|
</section> |
|
|
|
|
|
<section class="chart-section"> |
|
<h2 class="section-title">Performance Ranking</h2> |
|
<img src="ranking.png" alt="MCP Benchmark Ranking Chart" class="ranking-chart"> |
|
</section> |
|
|
|
|
|
<section class="leaderboard-section" id="leaderboard"> |
|
<h2 class="section-title">Detailed Results</h2> |
|
|
|
<div class="table-container"> |
|
<table class="leaderboard-table" id="leaderboardTable"> |
|
<thead> |
|
<tr> |
|
<th class="model-col"> |
|
<strong>Model</strong> |
|
</th> |
|
<th class="score-col"> |
|
<strong>Overall Score</strong> |
|
</th> |
|
<th class="metric-col"> |
|
Valid Tool<br>Name Rate |
|
</th> |
|
<th class="metric-col"> |
|
Schema<br>Compliance |
|
</th> |
|
<th class="metric-col"> |
|
Execution<br>Success |
|
</th> |
|
<th class="metric-col"> |
|
Task<br>Fulfillment |
|
</th> |
|
<th class="metric-col"> |
|
Information<br>Grounding |
|
</th> |
|
<th class="metric-col"> |
|
Tool<br>Appropriateness |
|
</th> |
|
<th class="metric-col"> |
|
Parameter<br>Accuracy |
|
</th> |
|
<th class="metric-col"> |
|
Dependency<br>Awareness |
|
</th> |
|
<th class="metric-col"> |
|
Parallelism<br>and Efficiency |
|
</th> |
|
</tr> |
|
</thead> |
|
<tbody id="tableBody"> |
|
|
|
</tbody> |
|
</table> |
|
</div> |
|
|
|
|
|
</section> |
|
|
|
|
|
<section class="citation-section"> |
|
<h2 class="section-title">Citation</h2> |
|
<div class="citation-box"> |
|
<pre class="citation-text">@article{wang2024mcpbench, |
|
title={MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers}, |
|
author={Wang, Zhenting and Chang, Qi and Patel, Hemani and Biju, Shashank and Wu, Cheng-En and Liu, Quan and Ding, Aolin and Rezazadeh, Alireza and Shah, Ankit and Bao, Yujia and Siow, Eugene}, |
|
journal={arXiv preprint arXiv:2508.20453}, |
|
year={2024} |
|
}</pre> |
|
<button class="copy-citation-btn" onclick="copyCitation()"> |
|
<i class="fas fa-copy"></i> Copy Citation |
|
</button> |
|
</div> |
|
</section> |
|
|
|
<footer class="footer"> |
|
<p>Last updated: <span id="lastUpdated"></span></p> |
|
<p>Data source: MCP-Bench Results (ArXiv: 2508.20453)</p> |
|
</footer> |
|
</div> |
|
|
|
<script> |
|
|
|
function copyCitation() { |
|
const citationText = document.querySelector('.citation-text').textContent; |
|
navigator.clipboard.writeText(citationText).then(() => { |
|
const button = document.querySelector('.copy-citation-btn'); |
|
const originalText = button.innerHTML; |
|
button.innerHTML = '<i class="fas fa-check"></i> Copied!'; |
|
button.style.backgroundColor = '#4caf50'; |
|
|
|
setTimeout(() => { |
|
button.innerHTML = originalText; |
|
button.style.backgroundColor = ''; |
|
}, 2000); |
|
}); |
|
} |
|
|
|
|
|
document.addEventListener('DOMContentLoaded', function() { |
|
const lastUpdated = document.getElementById('lastUpdated'); |
|
if (lastUpdated) { |
|
lastUpdated.textContent = 'December 2024'; |
|
} |
|
}); |
|
</script> |
|
</body> |
|
</html> |