mcp-bench / index.html
ztwang's picture
Upload 10 files
a399453 verified
raw
history blame
6.42 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MCP Benchmark Leaderboard</title>
<link rel="stylesheet" href="style.css">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
</head>
<body>
<div class="container">
<!-- Paper Information -->
<header class="paper-header">
<h1 class="paper-title">MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers</h1>
<div class="paper-authors">
<p>Zhenting Wang, Qi Chang, Hemani Patel, Shashank Biju, Cheng-En Wu, Quan Liu, Aolin Ding, Alireza Rezazadeh, Ankit Shah, Yujia Bao, Eugene Siow</p>
<p class="affiliation">Accenture, UC Berkeley</p>
</div>
<div class="paper-links">
<a href="https://github.com/Accenture/mcp-bench" class="paper-link">
<i class="fab fa-github"></i> GitHub
</a>
<a href="https://arxiv.org/abs/2508.20453" class="paper-link">
<i class="fas fa-file-pdf"></i> Paper
</a>
<a href="#leaderboard" class="paper-link">
<i class="fas fa-trophy"></i> Leaderboard
</a>
</div>
</header>
<!-- MCP Diagram -->
<section class="diagram-section">
<img src="mcp-bench.png" alt="MCP-Bench Architecture Diagram" class="diagram-image">
<p class="diagram-caption">
MCP-Bench is a comprehensive evaluation framework designed to assess Large Language Models' (LLMs) capabilities in tool-use scenarios through the Model Context Protocol (MCP). This benchmark provides an end-to-end pipeline for evaluating how effectively different LLMs can discover, select, and utilize tools to solve real-world tasks.
</p>
</section>
<!-- Ranking Chart -->
<section class="chart-section">
<h2 class="section-title">Performance Ranking</h2>
<img src="ranking.png" alt="MCP Benchmark Ranking Chart" class="ranking-chart">
</section>
<!-- Leaderboard Header -->
<section class="leaderboard-section" id="leaderboard">
<h2 class="section-title">Detailed Results</h2>
<div class="table-container">
<table class="leaderboard-table" id="leaderboardTable">
<thead>
<tr>
<th class="model-col">
<strong>Model</strong>
</th>
<th class="score-col">
<strong>Overall Score</strong>
</th>
<th class="metric-col">
Valid Tool<br>Name Rate
</th>
<th class="metric-col">
Schema<br>Compliance
</th>
<th class="metric-col">
Execution<br>Success
</th>
<th class="metric-col">
Task<br>Fulfillment
</th>
<th class="metric-col">
Information<br>Grounding
</th>
<th class="metric-col">
Tool<br>Appropriateness
</th>
<th class="metric-col">
Parameter<br>Accuracy
</th>
<th class="metric-col">
Dependency<br>Awareness
</th>
<th class="metric-col">
Parallelism<br>and Efficiency
</th>
</tr>
</thead>
<tbody id="tableBody">
<!-- Table rows will be generated by JavaScript -->
</tbody>
</table>
</div>
</section>
<!-- Citation Section -->
<section class="citation-section">
<h2 class="section-title">Citation</h2>
<div class="citation-box">
<pre class="citation-text">@article{wang2024mcpbench,
title={MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers},
author={Wang, Zhenting and Chang, Qi and Patel, Hemani and Biju, Shashank and Wu, Cheng-En and Liu, Quan and Ding, Aolin and Rezazadeh, Alireza and Shah, Ankit and Bao, Yujia and Siow, Eugene},
journal={arXiv preprint arXiv:2508.20453},
year={2024}
}</pre>
<button class="copy-citation-btn" onclick="copyCitation()">
<i class="fas fa-copy"></i> Copy Citation
</button>
</div>
</section>
<footer class="footer">
<p>Last updated: <span id="lastUpdated"></span></p>
<p>Data source: MCP-Bench Results (ArXiv: 2508.20453)</p>
</footer>
</div>
<script>
// Copy citation function
function copyCitation() {
const citationText = document.querySelector('.citation-text').textContent;
navigator.clipboard.writeText(citationText).then(() => {
const button = document.querySelector('.copy-citation-btn');
const originalText = button.innerHTML;
button.innerHTML = '<i class="fas fa-check"></i> Copied!';
button.style.backgroundColor = '#4caf50';
setTimeout(() => {
button.innerHTML = originalText;
button.style.backgroundColor = '';
}, 2000);
});
}
// Update last updated date
document.addEventListener('DOMContentLoaded', function() {
const lastUpdated = document.getElementById('lastUpdated');
if (lastUpdated) {
lastUpdated.textContent = 'December 2024';
}
});
</script>
</body>
</html>