NL2SQL / index.html
Lyyyy1818's picture
Upload 7 files
ad0d245 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>NL2SQL Leaderboard</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
}
body {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1400px;
margin: 0 auto;
background: #f8fafc;
border-radius: 20px;
box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
overflow: hidden;
}
header {
background: linear-gradient(to right, #1e40af, #3b82f6);
color: white;
padding: 30px 40px;
text-align: center;
}
h1 {
font-size: 2.8rem;
margin-bottom: 10px;
font-weight: 800;
}
.subtitle {
font-size: 1.2rem;
opacity: 0.9;
margin-bottom: 20px;
}
.tabs {
display: flex;
background: rgba(255, 255, 255, 0.1);
border-radius: 12px;
padding: 5px;
margin: 20px auto 0;
max-width: 800px;
}
.tab {
flex: 1;
padding: 15px;
text-align: center;
font-weight: 600;
font-size: 1.1rem;
cursor: pointer;
border-radius: 8px;
transition: all 0.3s ease;
}
.tab:hover {
background: rgba(255, 255, 255, 0.2);
}
.tab.active {
background: white;
color: #3b82f6;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
}
main {
padding: 30px;
}
.controls {
background: white;
padding: 30px;
border-radius: 12px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
margin-bottom: 30px;
}
.dataset-selector {
margin-bottom: 30px;
padding-bottom: 30px;
border-bottom: 1px solid #e2e8f0;
}
.metric-selector {
margin-bottom: 20px;
}
.dataset-label, .metric-label {
font-weight: 600;
color: #475569;
font-size: 1.1rem;
margin-bottom: 15px;
display: flex;
align-items: center;
gap: 10px;
}
.dataset-label i, .metric-label i {
color: #3b82f6;
}
.dataset-buttons {
display: flex;
flex-wrap: wrap;
gap: 10px;
}
.dataset-btn {
padding: 12px 24px;
background: white;
border: 2px solid #cbd5e1;
border-radius: 8px;
font-weight: 600;
color: #475569;
cursor: pointer;
transition: all 0.2s ease;
}
.dataset-btn:hover {
background: #f1f5f9;
transform: translateY(-2px);
}
.dataset-btn.active {
background: #3b82f6;
color: white;
border-color: #3b82f6;
box-shadow: 0 4px 12px rgba(59, 130, 246, 0.3);
}
.metric-checkboxes {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(250px, 1fr));
gap: 15px;
margin-top: 10px;
}
.metric-checkbox {
display: flex;
align-items: center;
gap: 12px;
padding: 10px 15px;
background: #f8fafc;
border-radius: 8px;
transition: all 0.2s ease;
border: 1px solid #e2e8f0;
}
.metric-checkbox:hover {
background: #f1f5f9;
transform: translateY(-2px);
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
}
.metric-checkbox input {
width: 18px;
height: 18px;
cursor: pointer;
accent-color: #3b82f6;
}
.metric-checkbox label {
font-weight: 500;
color: #475569;
cursor: pointer;
font-size: 0.95rem;
flex: 1;
}
.metric-hint {
font-size: 0.85rem;
color: #64748b;
margin-top: 5px;
font-style: italic;
}
.table-container {
overflow-x: auto;
border-radius: 12px;
box-shadow: 0 10px 25px rgba(0, 0, 0, 0.1);
background: white;
margin-bottom: 30px;
}
table {
width: 100%;
border-collapse: collapse;
min-width: 1200px;
}
th {
background: linear-gradient(to right, #1e40af, #3b82f6);
color: white;
text-align: left;
padding: 18px 20px;
font-weight: 600;
font-size: 1.1rem;
position: sticky;
top: 0;
}
th.algorithm-header {
min-width: 200px;
}
td {
padding: 16px 20px;
border-bottom: 1px solid #e2e8f0;
font-size: 1rem;
}
tr:hover {
background: #f8fafc;
}
tr:nth-child(even) {
background: #f8fafc;
}
tr:nth-child(even):hover {
background: #f1f5f9;
}
.algorithm-name {
font-weight: 600;
color: #1e293b;
font-size: 1.1rem;
}
.metric-value {
font-weight: 600;
color: #0f172a;
text-align: center;
}
.highlight {
background: linear-gradient(120deg, #f0f9ff 0%, #e0f2fe 100%);
border-left: 4px solid #0ea5e9;
}
.best-value {
background: linear-gradient(120deg, #dcfce7 0%, #bbf7d0 100%);
font-weight: 700;
color: #166534;
position: relative;
}
.best-value::after {
content: "🏆";
position: absolute;
right: 5px;
top: 50%;
transform: translateY(-50%);
font-size: 0.9rem;
}
.no-data {
text-align: center;
padding: 60px 20px;
color: #64748b;
font-size: 1.2rem;
}
.no-data i {
font-size: 3rem;
margin-bottom: 20px;
opacity: 0.5;
}
/* Submit Data Styles */
.submit-container {
padding: 40px;
text-align: center;
background: white;
border-radius: 12px;
box-shadow: 0 10px 25px rgba(0, 0, 0, 0.1);
margin-bottom: 30px;
}
.submit-icon {
font-size: 5rem;
color: #cbd5e1;
margin-bottom: 20px;
}
.submit-title {
font-size: 2.5rem;
color: #64748b;
margin-bottom: 15px;
}
.submit-description {
font-size: 1.2rem;
color: #94a3b8;
max-width: 600px;
margin: 0 auto 30px;
line-height: 1.6;
}
.coming-soon {
display: inline-block;
background: #f1f5f9;
color: #64748b;
padding: 10px 25px;
border-radius: 30px;
font-weight: 600;
font-size: 1.1rem;
}
.submit-form {
max-width: 600px;
margin: 30px auto;
background: #f8fafc;
padding: 25px;
border-radius: 12px;
text-align: left;
}
.form-group {
margin-bottom: 20px;
}
.form-group label {
display: block;
font-weight: 600;
color: #475569;
margin-bottom: 8px;
}
.form-group input,
.form-group select,
.form-group textarea {
width: 100%;
padding: 12px;
border: 2px solid #cbd5e1;
border-radius: 8px;
font-size: 1rem;
transition: border-color 0.2s ease;
}
.form-group input:focus,
.form-group select:focus,
.form-group textarea:focus {
outline: none;
border-color: #3b82f6;
}
.submit-btn {
background: #3b82f6;
color: white;
border: none;
padding: 12px 30px;
border-radius: 8px;
font-weight: 600;
font-size: 1.1rem;
cursor: pointer;
transition: all 0.2s ease;
}
.submit-btn:hover {
background: #2563eb;
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(37, 99, 235, 0.3);
}
/* Footer */
footer {
text-align: center;
padding: 20px;
color: #94a3b8;
font-size: 0.9rem;
border-top: 1px solid #e2e8f0;
margin-top: 30px;
}
.controls-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 25px;
padding-bottom: 15px;
border-bottom: 1px solid #e2e8f0;
}
.controls-title {
font-size: 1.4rem;
color: #1e293b;
font-weight: 700;
}
.reset-btn {
background: #f1f5f9;
color: #475569;
border: 1px solid #cbd5e1;
padding: 8px 16px;
border-radius: 6px;
font-weight: 500;
cursor: pointer;
transition: all 0.2s ease;
}
.reset-btn:hover {
background: #e2e8f0;
}
/* Responsive */
@media (max-width: 768px) {
.container {
border-radius: 10px;
}
h1 {
font-size: 2rem;
}
.tab {
padding: 12px 15px;
font-size: 1rem;
}
.controls {
padding: 20px;
}
.metric-checkboxes {
grid-template-columns: 1fr;
}
th, td {
padding: 12px 15px;
}
.submit-container {
padding: 20px;
}
.submit-title {
font-size: 1.8rem;
}
.controls-header {
flex-direction: column;
align-items: flex-start;
gap: 10px;
}
.reset-btn {
align-self: flex-end;
}
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>NL2SQL Leaderboard</h1>
<div class="subtitle">Benchmarking Natural Language to SQL Conversion Models and Agents</div>
<div class="tabs">
<div class="tab active" onclick="switchTab('leaderboard')">Leaderboard</div>
<div class="tab" onclick="switchTab('submit')">Submit Results</div>
<div class="tab" onclick="switchTab('about')">About</div>
</div>
</header>
<main id="leaderboard-content">
<div class="controls">
<div class="controls-header">
<div class="controls-title">
<i class="fas fa-sliders-h"></i> Filter Controls
</div>
<button class="reset-btn" onclick="resetFilters()">
<i class="fas fa-redo"></i> Reset Filters
</button>
</div>
<div class="dataset-selector">
<div class="dataset-label">
<i class="fas fa-database"></i>
Select Dataset
</div>
<div class="dataset-buttons" id="dataset-buttons">
<!-- 动态生成数据集按钮 -->
</div>
</div>
<div class="metric-selector">
<div class="metric-label">
<i class="fas fa-chart-bar"></i>
Select Metrics to Display
</div>
<div class="metric-checkboxes" id="metric-checkboxes">
<!-- 动态生成指标复选框 -->
</div>
<div class="metric-hint">
Tip: Click on metrics to show/hide them in the table. The best value for each metric is highlighted with a 🏆. Metrics Precision/Recall/F1 are calculated only among the sqls that are executable.
</div>
</div>
</div>
<div class="table-container">
<table id="leaderboard-table">
<thead>
<tr id="table-header">
<!-- 动态生成表头 -->
</tr>
</thead>
<tbody id="table-body">
<!-- 动态生成表格内容 -->
</tbody>
</table>
</div>
<div class="no-data" id="no-data-message" style="display: none;">
<i class="fas fa-database"></i>
<h3>No Data Available</h3>
<p>This dataset currently has no results. Be the first to submit!</p>
<button class="submit-btn" onclick="switchTab('submit')" style="margin-top: 20px;">
<i class="fas fa-cloud-upload-alt"></i> Submit Results
</button>
</div>
</main>
<div id="submit-content" style="display: none;">
<div class="submit-container">
<div class="submit-icon">
<i class="fas fa-cloud-upload-alt"></i>
</div>
<h2 class="submit-title">Submit Your Results</h2>
<p class="submit-description">
Submit your NL2SQL model results to be included in the leaderboard.
Please ensure your results follow the required format.
</p>
<div class="coming-soon">
<i class="fas fa-tools"></i> Submission Portal Under Development
</div>
<div class="submit-form">
<div class="form-group">
<label for="dataset-select"><i class="fas fa-database"></i> Select Dataset</label>
<select id="dataset-select" class="dataset-select">
<option value="birddev">BIRD Dev</option>
<option value="birdtrain">BIRD Train</option>
<option value="spider">Spider</option>
</select>
</div>
<div class="form-group">
<label for="algorithm-name"><i class="fas fa-robot"></i> Algorithm/Model Name</label>
<input type="text" id="algorithm-name" placeholder="Enter your algorithm name">
</div>
<div class="form-group">
<label for="results-json"><i class="fas fa-code"></i> Results (JSON format)</label>
<textarea id="results-json" rows="6" placeholder='{
"execution_ability": 0.85,
"correct_rate": 0.78,
"execution_efficiency_on_executable_sql": 0.92,
"table_precision_on_executable_sql": 0.87,
"table_recall_on_executable_sql": 0.85,
"table_f1_on_executable_sql": 0.86,
"column_precision_on_executable_sql": 0.83,
"column_recall_on_executable_sql": 0.80,
"column_f1_on_executable_sql": 0.815
}'></textarea>
</div>
<div style="text-align: center;">
<button class="submit-btn" onclick="submitResults()">
<i class="fas fa-paper-plane"></i> Submit Results (Demo)
</button>
</div>
</div>
</div>
</div>
<div id="about-content" style="display: none;">
<div class="submit-container">
<h2 class="submit-title">About NL2SQL Leaderboard</h2>
<p class="submit-description">
This leaderboard tracks the performance of various Natural Language to SQL (NL2SQL) models
across multiple benchmarks and metrics.
</p>
<div style="text-align: left; max-width: 800px; margin: 40px auto;">
<h3 style="color: #475569; margin-bottom: 15px;">Metrics Explained:</h3>
<ul style="list-style-type: none; padding: 0;">
<li style="margin-bottom: 10px; padding: 10px; background: #f1f5f9; border-radius: 6px;">
<strong>Execution Ability:</strong> Ability to generate executable SQL queries
</li>
<li style="margin-bottom: 10px; padding: 10px; background: #f1f5f9; border-radius: 6px;">
<strong>Correct Rate:</strong> Percentage of correctly generated SQL queries
</li>
<li style="margin-bottom: 10px; padding: 10px; background: #f1f5f9; border-radius: 6px;">
<strong>Execution Efficiency:</strong> Performance on executable SQL queries
</li>
<li style="margin-bottom: 10px; padding: 10px; background: #f1f5f9; border-radius: 6px;">
<strong>Table/Column Precision/Recall/F1:</strong> Accuracy in selecting correct tables and columns when the sql is executable
</li>
</ul>
<h3 style="color: #475569; margin-top: 30px; margin-bottom: 15px;">Supported Datasets:</h3>
<ul style="list-style-type: none; padding: 0;">
<li style="margin-bottom: 10px; padding: 10px; background: #f1f5f9; border-radius: 6px;">
<strong>BIRD Dev:</strong> Benchmark for large-scale database tasks in the real world
</li>
<li style="margin-bottom: 10px; padding: 10px; background: #f1f5f9; border-radius: 6px;">
<strong>BIRD Train:</strong> Training set for BIRD benchmark
</li>
<li style="margin-bottom: 10px; padding: 10px; background: #f1f5f9; border-radius: 6px;">
<strong>Spider:</strong> Large-scale complex and cross-domain semantic parsing and text-to-SQL dataset
</li>
</ul>
</div>
</div>
</div>
<footer>
<p>NL2SQL Leaderboard • Last Updated: <span id="last-updated">Loading...</span></p>
<p style="margin-top: 5px; font-size: 0.8rem;">
<i class="fas fa-code"></i> Built with FastAPI &bull;
<i class="fas fa-server"></i> Docker Ready &bull;
<i class="fas fa-chart-line"></i> Real-time Updates
</p>
</footer>
</div>
<script>
let currentDataset = 'birddev';
let datasets = {};
let selectedMetrics = new Set();
let datasetMetrics = {};
// 页面加载时获取数据
document.addEventListener('DOMContentLoaded', async function() {
await loadLeaderboardData();
updateLastUpdated();
});
// 加载排行榜数据
async function loadLeaderboardData() {
try {
const response = await fetch('/api/data');
const data = await response.json();
datasets = data.datasets;
// 为每个数据集存储指标
for (const [key, dataset] of Object.entries(datasets)) {
datasetMetrics[key] = dataset.metrics;
}
// 初始化数据集选择按钮
initDatasetButtons();
// 显示当前数据集
showDataset(currentDataset);
} catch (error) {
console.error('Error loading data:', error);
alert('Failed to load leaderboard data');
}
}
// 更新最后更新时间
async function updateLastUpdated() {
try {
const response = await fetch('/api/data');
const data = await response.json();
const date = new Date(data.last_updated);
document.getElementById('last-updated').textContent =
date.toLocaleString();
} catch (error) {
console.error('Error updating timestamp:', error);
}
}
// 初始化数据集按钮
function initDatasetButtons() {
const container = document.getElementById('dataset-buttons');
container.innerHTML = '';
for (const [key, dataset] of Object.entries(datasets)) {
const button = document.createElement('button');
button.className = `dataset-btn ${key === currentDataset ? 'active' : ''}`;
button.textContent = dataset.name; // 移除了图标
button.onclick = () => {
currentDataset = key;
showDataset(key);
updateDatasetButtons();
};
container.appendChild(button);
}
}
// 更新数据集按钮状态
function updateDatasetButtons() {
const buttons = document.querySelectorAll('.dataset-btn');
buttons.forEach(button => {
// 移除所有active类
button.classList.remove('active');
// 找到对应的数据集键
const buttonText = button.textContent.toLowerCase().replace(/\s+/g, '');
let foundKey = null;
for (const [key, dataset] of Object.entries(datasets)) {
if (dataset.name.toLowerCase().replace(/\s+/g, '').includes(buttonText.replace(/icon/g, ''))) {
foundKey = key;
break;
}
}
if (foundKey === currentDataset) {
button.classList.add('active');
}
});
}
// 显示指定数据集
function showDataset(datasetKey) {
const dataset = datasets[datasetKey];
if (!dataset) return;
// 初始化指标选择(如果这是第一次显示这个数据集)
if (!selectedMetrics.size || !Array.from(selectedMetrics).some(m => dataset.metrics.includes(m))) {
// 重置为选中所有指标
selectedMetrics = new Set(dataset.metrics || []);
}
// 更新指标复选框
updateMetricCheckboxes(datasetKey);
// 更新表格
updateTable(dataset);
// 显示/隐藏无数据消息
const noDataMsg = document.getElementById('no-data-message');
if (dataset.algorithms.length === 0) {
noDataMsg.style.display = 'block';
document.querySelector('.table-container').style.display = 'none';
} else {
noDataMsg.style.display = 'none';
document.querySelector('.table-container').style.display = 'block';
}
}
// 更新指标复选框
function updateMetricCheckboxes(datasetKey) {
const container = document.getElementById('metric-checkboxes');
container.innerHTML = '';
const metrics = datasetMetrics[datasetKey] || [];
metrics.forEach(metric => {
const checkbox = document.createElement('div');
checkbox.className = 'metric-checkbox';
const inputId = `metric-${metric}`;
const isChecked = selectedMetrics.has(metric);
checkbox.innerHTML = `
<input type="checkbox" id="${inputId}" ${isChecked ? 'checked' : ''}
onchange="toggleMetric('${metric}', this.checked)">
<label for="${inputId}">${formatMetricName(metric)}</label> <!-- 移除了图标 -->
`;
container.appendChild(checkbox);
});
}
// 切换指标显示
function toggleMetric(metric, isChecked) {
if (isChecked) {
selectedMetrics.add(metric);
} else {
selectedMetrics.delete(metric);
}
// 重新显示当前数据集(会刷新表格)
showDataset(currentDataset);
}
// 格式化指标名称
function formatMetricName(metric) {
// 先替换_on_executable_sql
let name = metric.replace(/_on_executable_sql/g, '');
// 将下划线替换为空格
name = name.replace(/_/g, ' ');
// 首字母大写
name = name.replace(/\b\w/g, l => l.toUpperCase());
return name;
}
// 重置过滤器
function resetFilters() {
// 重置为默认数据集
currentDataset = 'birddev';
// 重置指标选择
const dataset = datasets[currentDataset];
selectedMetrics = new Set(dataset.metrics || []);
// 更新显示
showDataset(currentDataset);
updateDatasetButtons();
}
// 切换标签页(如果需要,也可以移除控制面板标题中的图标)
function switchTab(tabName) {
// 更新标签页状态
document.querySelectorAll('.tab').forEach(tab => {
tab.classList.toggle('active', tab.textContent.toLowerCase().includes(tabName));
});
// 显示对应的内容
document.getElementById('leaderboard-content').style.display =
tabName === 'leaderboard' ? 'block' : 'none';
document.getElementById('submit-content').style.display =
tabName === 'submit' ? 'block' : 'none';
document.getElementById('about-content').style.display =
tabName === 'about' ? 'block' : 'none';
}
// 更新表格
function updateTable(dataset) {
const algorithms = dataset.algorithms;
const metrics = dataset.metrics || [];
// 更新表头
updateTableHeader(metrics);
// 更新表格内容
updateTableBody(algorithms, metrics);
}
// 更新表头
function updateTableHeader(metrics) {
const header = document.getElementById('table-header');
header.innerHTML = '<th class="algorithm-header">Algorithm</th>'; // 移除了图标
// 只添加选中的指标
metrics.forEach(metric => {
if (selectedMetrics.has(metric)) {
header.innerHTML += `<th>${formatMetricName(metric)}</th>`; // 移除了图标
}
});
}
// 更新表格内容
function updateTableBody(algorithms, metrics) {
const tbody = document.getElementById('table-body');
tbody.innerHTML = '';
if (algorithms.length === 0) return;
// 计算每个指标的最佳值
const bestValues = {};
metrics.forEach(metric => {
if (selectedMetrics.has(metric)) {
const values = algorithms.map(a => a[metric] || 0);
bestValues[metric] = Math.max(...values);
}
});
// 填充数据行
algorithms.forEach(algorithm => {
const row = document.createElement('tr');
let rowHtml = `<td class="algorithm-name">${algorithm.name}</td>`; // 移除了图标
metrics.forEach(metric => {
if (selectedMetrics.has(metric)) {
const value = algorithm[metric];
const isBest = value === bestValues[metric] && value !== undefined;
const cellClass = isBest ? 'metric-value best-value' : 'metric-value';
const displayValue = value !== undefined ? value.toFixed(3) : 'N/A';
rowHtml += `<td class="${cellClass}">${displayValue}</td>`;
}
});
row.innerHTML = rowHtml;
tbody.appendChild(row);
});
}
// 每30秒刷新数据
setInterval(updateLastUpdated, 30000);
</script>
</body>
</html>