const LEADERBOARD_DATA = { "lastUpdated": "2025-09-05", "models": [ { "name": "llama-3-1-8b-instruct", "valid_tool_name_rate": 96.1, "schema_compliance": 89.4, "execution_success": 90.9, "task_fulfillment": 0.261, "information_grounding": 0.295, "tool_appropriateness": 0.352, "parameter_accuracy": 0.310, "dependency_awareness": 0.221, "parallelism_efficiency": 0.141, "overall_score": 0.428 }, { "name": "llama-3-2-90b-vision-instruct", "valid_tool_name_rate": 99.6, "schema_compliance": 85.0, "execution_success": 90.9, "task_fulfillment": 0.293, "information_grounding": 0.444, "tool_appropriateness": 0.515, "parameter_accuracy": 0.427, "dependency_awareness": 0.267, "parallelism_efficiency": 0.173, "overall_score": 0.495 }, { "name": "nova-micro-v1", "valid_tool_name_rate": 96.0, "schema_compliance": 93.1, "execution_success": 87.8, "task_fulfillment": 0.339, "information_grounding": 0.419, "tool_appropriateness": 0.504, "parameter_accuracy": 0.428, "dependency_awareness": 0.315, "parallelism_efficiency": 0.212, "overall_score": 0.508 }, { "name": "llama-3-1-70b-instruct", "valid_tool_name_rate": 99.2, "schema_compliance": 90.5, "execution_success": 92.5, "task_fulfillment": 0.314, "information_grounding": 0.432, "tool_appropriateness": 0.523, "parameter_accuracy": 0.451, "dependency_awareness": 0.287, "parallelism_efficiency": 0.191, "overall_score": 0.510 }, { "name": "mistral-small-2503", "valid_tool_name_rate": 96.4, "schema_compliance": 95.6, "execution_success": 86.2, "task_fulfillment": 0.373, "information_grounding": 0.445, "tool_appropriateness": 0.537, "parameter_accuracy": 0.446, "dependency_awareness": 0.349, "parallelism_efficiency": 0.232, "overall_score": 0.530 }, { "name": "gpt-4o-mini", "valid_tool_name_rate": 97.5, "schema_compliance": 98.1, "execution_success": 93.9, "task_fulfillment": 0.374, "information_grounding": 0.500, "tool_appropriateness": 0.555, "parameter_accuracy": 0.544, "dependency_awareness": 0.352, "parallelism_efficiency": 0.201, "overall_score": 0.557 }, { "name": "llama-3-3-70b-instruct", "valid_tool_name_rate": 99.5, "schema_compliance": 93.8, "execution_success": 91.6, "task_fulfillment": 0.349, "information_grounding": 0.493, "tool_appropriateness": 0.583, "parameter_accuracy": 0.525, "dependency_awareness": 0.355, "parallelism_efficiency": 0.262, "overall_score": 0.558 }, { "name": "gemma-3-27b-it", "valid_tool_name_rate": 98.8, "schema_compliance": 97.6, "execution_success": 94.4, "task_fulfillment": 0.378, "information_grounding": 0.530, "tool_appropriateness": 0.608, "parameter_accuracy": 0.572, "dependency_awareness": 0.383, "parallelism_efficiency": 0.249, "overall_score": 0.582 }, { "name": "gpt-4o", "valid_tool_name_rate": 98.9, "schema_compliance": 98.3, "execution_success": 92.8, "task_fulfillment": 0.394, "information_grounding": 0.542, "tool_appropriateness": 0.627, "parameter_accuracy": 0.587, "dependency_awareness": 0.405, "parallelism_efficiency": 0.272, "overall_score": 0.595 }, { "name": "gemini-2.5-flash-lite", "valid_tool_name_rate": 99.4, "schema_compliance": 97.8, "execution_success": 94.3, "task_fulfillment": 0.412, "information_grounding": 0.577, "tool_appropriateness": 0.627, "parameter_accuracy": 0.597, "dependency_awareness": 0.404, "parallelism_efficiency": 0.226, "overall_score": 0.598 }, { "name": "qwen3-30b-a3b-instruct-2507", "valid_tool_name_rate": 99.0, "schema_compliance": 98.4, "execution_success": 92.3, "task_fulfillment": 0.481, "information_grounding": 0.530, "tool_appropriateness": 0.658, "parameter_accuracy": 0.638, "dependency_awareness": 0.473, "parallelism_efficiency": 0.303, "overall_score": 0.627 }, { "name": "kimi-k2", "valid_tool_name_rate": 98.8, "schema_compliance": 98.1, "execution_success": 94.5, "task_fulfillment": 0.502, "information_grounding": 0.577, "tool_appropriateness": 0.631, "parameter_accuracy": 0.623, "dependency_awareness": 0.448, "parallelism_efficiency": 0.307, "overall_score": 0.629 }, { "name": "gpt-oss-20b", "valid_tool_name_rate": 98.8, "schema_compliance": 99.1, "execution_success": 93.6, "task_fulfillment": 0.547, "information_grounding": 0.623, "tool_appropriateness": 0.661, "parameter_accuracy": 0.638, "dependency_awareness": 0.509, "parallelism_efficiency": 0.309, "overall_score": 0.654 }, { "name": "glm-4.5", "valid_tool_name_rate": 99.7, "schema_compliance": 99.7, "execution_success": 97.4, "task_fulfillment": 0.525, "information_grounding": 0.682, "tool_appropriateness": 0.680, "parameter_accuracy": 0.661, "dependency_awareness": 0.523, "parallelism_efficiency": 0.297, "overall_score": 0.668 }, { "name": "qwen3-235b-a22b-2507", "valid_tool_name_rate": 99.1, "schema_compliance": 99.3, "execution_success": 94.8, "task_fulfillment": 0.549, "information_grounding": 0.625, "tool_appropriateness": 0.688, "parameter_accuracy": 0.712, "dependency_awareness": 0.542, "parallelism_efficiency": 0.355, "overall_score": 0.678 }, { "name": "claude-sonnet-4", "valid_tool_name_rate": 100.0, "schema_compliance": 99.8, "execution_success": 98.8, "task_fulfillment": 0.554, "information_grounding": 0.676, "tool_appropriateness": 0.689, "parameter_accuracy": 0.671, "dependency_awareness": 0.541, "parallelism_efficiency": 0.328, "overall_score": 0.681 }, { "name": "gemini-2.5-pro", "valid_tool_name_rate": 99.4, "schema_compliance": 99.6, "execution_success": 96.9, "task_fulfillment": 0.562, "information_grounding": 0.725, "tool_appropriateness": 0.717, "parameter_accuracy": 0.670, "dependency_awareness": 0.541, "parallelism_efficiency": 0.329, "overall_score": 0.690 }, { "name": "gpt-oss-120b", "valid_tool_name_rate": 97.7, "schema_compliance": 98.8, "execution_success": 94.0, "task_fulfillment": 0.636, "information_grounding": 0.705, "tool_appropriateness": 0.691, "parameter_accuracy": 0.661, "dependency_awareness": 0.576, "parallelism_efficiency": 0.329, "overall_score": 0.692 }, { "name": "o3", "valid_tool_name_rate": 99.3, "schema_compliance": 99.9, "execution_success": 97.1, "task_fulfillment": 0.641, "information_grounding": 0.706, "tool_appropriateness": 0.724, "parameter_accuracy": 0.726, "dependency_awareness": 0.592, "parallelism_efficiency": 0.359, "overall_score": 0.715 }, { "name": "gpt-5", "valid_tool_name_rate": 100.0, "schema_compliance": 99.3, "execution_success": 99.1, "task_fulfillment": 0.677, "information_grounding": 0.828, "tool_appropriateness": 0.767, "parameter_accuracy": 0.749, "dependency_awareness": 0.649, "parallelism_efficiency": 0.339, "overall_score": 0.749 } ] }; class LeaderboardApp { constructor() { this.data = null; this.filteredData = null; this.currentSort = { column: 'overall_score', ascending: false }; this.init(); } async init() { try { console.log('Starting LeaderboardApp initialization...'); this.loadData(); this.setupEventListeners(); this.renderTable(); this.updateLastUpdated(); console.log('LeaderboardApp initialization complete'); } catch (error) { console.error('Failed to initialize app:', error); this.showError('Failed to load leaderboard data'); } } loadData() { const loading = document.getElementById('loading'); if (loading) { loading.classList.add('active'); } try { console.log('Loading LEADERBOARD_DATA...'); this.data = LEADERBOARD_DATA; console.log('Data loaded, models count:', this.data.models ? this.data.models.length : 0); this.filteredData = [...this.data.models]; console.log('Filtered data initialized with', this.filteredData.length, 'models'); this.sortData(); console.log('Data sorted'); } catch (error) { console.error('Error loading data:', error); } finally { if (loading) { loading.classList.remove('active'); } } } setupEventListeners() { const searchInput = document.getElementById('searchInput'); const sortSelect = document.getElementById('sortSelect'); const sortOrder = document.getElementById('sortOrder'); const tableHeaders = document.querySelectorAll('.sortable'); searchInput.addEventListener('input', (e) => this.handleSearch(e.target.value)); sortSelect.addEventListener('change', (e) => this.handleSortChange(e.target.value)); sortOrder.addEventListener('click', () => this.toggleSortOrder()); tableHeaders.forEach(header => { header.addEventListener('click', () => { const column = header.dataset.column; this.handleColumnSort(column); }); }); } handleSearch(query) { const searchTerm = query.toLowerCase().trim(); if (searchTerm === '') { this.filteredData = [...this.data.models]; } else { this.filteredData = this.data.models.filter(model => model.name.toLowerCase().includes(searchTerm) ); } this.sortData(); this.renderTable(); } handleSortChange(column) { this.currentSort.column = column; this.sortData(); this.renderTable(); this.updateSortIndicators(); } handleColumnSort(column) { if (this.currentSort.column === column) { this.currentSort.ascending = !this.currentSort.ascending; } else { this.currentSort.column = column; this.currentSort.ascending = false; } document.getElementById('sortSelect').value = column; this.sortData(); this.renderTable(); this.updateSortIndicators(); this.updateSortOrderButton(); } toggleSortOrder() { this.currentSort.ascending = !this.currentSort.ascending; this.sortData(); this.renderTable(); this.updateSortOrderButton(); } sortData() { const { column, ascending } = this.currentSort; this.filteredData.sort((a, b) => { let aValue = a[column]; let bValue = b[column]; if (typeof aValue === 'string') { aValue = aValue.toLowerCase(); bValue = bValue.toLowerCase(); } let comparison = 0; if (aValue > bValue) comparison = 1; if (aValue < bValue) comparison = -1; return ascending ? comparison : -comparison; }); } renderTable() { const tableBody = document.getElementById('tableBody'); if (!tableBody) { console.error('tableBody element not found!'); return; } if (!this.filteredData || this.filteredData.length === 0) { console.log('No filtered data to display'); tableBody.innerHTML = `