mcp-bench / index.html
ztwang's picture
Upload 22 files
4966301 verified
raw
history blame
8.03 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MCP Benchmark Leaderboard</title>
<link rel="stylesheet" href="style.css">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
</head>
<body>
<div class="container">
<!-- Paper Information -->
<header class="paper-header">
<h1 class="paper-title">MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers</h1>
<div class="paper-authors">
<p>Zhenting Wang, Qi Chang, Hemani Patel, Shashank Biju, Cheng-En Wu, Quan Liu, Aolin Ding, Alireza Rezazadeh, Ankit Shah, Yujia Bao, Eugene Siow</p>
<p class="affiliation">Accenture, UC Berkeley</p>
</div>
<div class="paper-links">
<a href="https://github.com/Accenture/mcp-bench" class="paper-link">
<i class="fab fa-github"></i> GitHub
</a>
<a href="https://arxiv.org/abs/2508.20453" class="paper-link">
<i class="fas fa-file-pdf"></i> Paper
</a>
<a href="#leaderboard" class="paper-link">
<i class="fas fa-trophy"></i> Leaderboard
</a>
</div>
</header>
<!-- MCP Diagram -->
<section class="diagram-section">
<img src="mcp-bench.png" alt="MCP-Bench Architecture Diagram" class="diagram-image">
<p class="diagram-caption">
MCP-Bench is a comprehensive evaluation framework designed to assess Large Language Models' (LLMs) capabilities in tool-use scenarios through the Model Context Protocol (MCP). This benchmark provides an end-to-end pipeline for evaluating how effectively different LLMs can discover, select, and utilize tools to solve real-world tasks.
</p>
</section>
<!-- Ranking Chart -->
<section class="chart-section">
<h2 class="section-title">Performance Ranking</h2>
<img src="ranking.png" alt="MCP Benchmark Ranking Chart" class="ranking-chart">
</section>
<!-- Leaderboard Header -->
<section class="leaderboard-section" id="leaderboard">
<h2 class="section-title">Detailed Results</h2>
<div class="controls">
<div class="search-container">
<i class="fas fa-search"></i>
<input type="text" id="searchInput" placeholder="Search models..." class="search-input">
</div>
<div class="filter-container">
<label for="sortSelect">Sort by:</label>
<select id="sortSelect" class="sort-select">
<option value="overall_score">Overall Score</option>
<option value="valid_tool_schema">Valid Tool Schema</option>
<option value="compliance">Compliance</option>
<option value="task_success">Task Success</option>
<option value="schema_understanding">Schema Understanding</option>
<option value="task_completion">Task Completion</option>
<option value="tool_usage">Tool Usage</option>
<option value="planning_effectiveness">Planning Effectiveness</option>
</select>
<button id="sortOrder" class="sort-btn" title="Toggle sort order">
<i class="fas fa-sort-amount-down"></i>
</button>
</div>
</div>
<div class="table-container">
<table class="leaderboard-table" id="leaderboardTable">
<thead>
<tr>
<th class="model-col sortable" data-column="name">
<strong>Model</strong>
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="score-col sortable" data-column="overall_score">
<strong>Overall Score</strong>
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="metric-col sortable" data-column="valid_tool_name_rate">
Valid Tool<br>Name Rate
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="metric-col sortable" data-column="schema_compliance">
Schema<br>Compliance
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="metric-col sortable" data-column="execution_success">
Execution<br>Success
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="metric-col sortable" data-column="task_fulfillment">
Task<br>Fulfillment
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="metric-col sortable" data-column="information_grounding">
Information<br>Grounding
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="metric-col sortable" data-column="tool_appropriateness">
Tool<br>Appropriateness
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="metric-col sortable" data-column="parameter_accuracy">
Parameter<br>Accuracy
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="metric-col sortable" data-column="dependency_awareness">
Dependency<br>Awareness
<i class="fas fa-sort sort-icon"></i>
</th>
<th class="metric-col sortable" data-column="parallelism_efficiency">
Parallelism<br>and Efficiency
<i class="fas fa-sort sort-icon"></i>
</th>
</tr>
</thead>
<tbody id="tableBody">
<!-- Table rows will be generated by JavaScript -->
</tbody>
</table>
</div>
<div class="loading" id="loading">
<i class="fas fa-spinner fa-spin"></i>
Loading leaderboard data...
</div>
</section>
<!-- Citation Section -->
<section class="citation-section">
<h2 class="section-title">Citation</h2>
<div class="citation-box">
<pre class="citation-text">@article{wang2024mcpbench,
title={MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers},
author={Wang, Zhenting and Chang, Qi and Patel, Hemani and Biju, Shashank and Wu, Cheng-En and Liu, Quan and Ding, Aolin and Rezazadeh, Alireza and Shah, Ankit and Bao, Yujia and Siow, Eugene},
journal={arXiv preprint arXiv:2508.20453},
year={2024}
}</pre>
<button class="copy-citation-btn" onclick="copyCitation()">
<i class="fas fa-copy"></i> Copy Citation
</button>
</div>
</section>
<footer class="footer">
<p>Last updated: <span id="lastUpdated"></span></p>
<p>Data source: MCP-Bench Results (ArXiv: 2508.20453)</p>
</footer>
</div>
<script src="script.js"></script>
</body>
</html>