Spaces:
Running
Running
Mandark-droid
commited on
Commit
·
fae4e5b
1
Parent(s):
97decc0
Initial TraceMind-AI setup with MCP client integration
Browse files- MCP client module for connecting to TraceMind-mcp-server
- Data loader for HuggingFace datasets
- Minimal Gradio app with leaderboard and cost estimator
- Clean, professional README for hackathon submission
- Auth and navigation utilities from MockTraceMind
- Project structure ready for screen migration
- .env.example +19 -0
- README.md +225 -4
- app.py +215 -0
- data_loader.py +255 -0
- mcp_client/__init__.py +8 -0
- mcp_client/client.py +351 -0
- mcp_client/sync_wrapper.py +131 -0
- requirements.txt +20 -0
- styles/__init__.py +8 -0
- styles/tracemind_theme.py +204 -0
- utils/__init__.py +1 -0
- utils/auth.py +193 -0
- utils/navigation.py +158 -0
.env.example
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HuggingFace Configuration
|
| 2 |
+
HF_TOKEN=your_huggingface_token_here
|
| 3 |
+
|
| 4 |
+
# TraceMind MCP Server Configuration
|
| 5 |
+
# Use the deployed TraceMind-mcp-server endpoint
|
| 6 |
+
MCP_SERVER_URL=https://kshitijthakkar-tracemind-mcp-server.hf.space/gradio_api/mcp/
|
| 7 |
+
|
| 8 |
+
# After hackathon submission, use:
|
| 9 |
+
# MCP_SERVER_URL=https://mcp-1st-birthday-tracemind-mcp-server.hf.space/gradio_api/mcp/
|
| 10 |
+
|
| 11 |
+
# Dataset Configuration
|
| 12 |
+
LEADERBOARD_REPO=kshitijthakkar/smoltrace-leaderboard
|
| 13 |
+
# Example results/traces repos (will be loaded dynamically from leaderboard)
|
| 14 |
+
# RESULTS_REPO=kshitijthakkar/agent-results-gpt4-20251116
|
| 15 |
+
# TRACES_REPO=kshitijthakkar/agent-traces-gpt4-20251116
|
| 16 |
+
# METRICS_REPO=kshitijthakkar/agent-metrics-gpt4-20251116
|
| 17 |
+
|
| 18 |
+
# Development Mode (skip authentication for local testing)
|
| 19 |
+
DEV_MODE=true
|
README.md
CHANGED
|
@@ -1,10 +1,231 @@
|
|
| 1 |
---
|
| 2 |
title: TraceMind AI
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: indigo
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk:
|
|
|
|
|
|
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: TraceMind AI
|
| 3 |
+
emoji: 🔍
|
| 4 |
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: "6.0.0"
|
| 8 |
+
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
tags:
|
| 11 |
+
- mcp-in-action-track-enterprise
|
| 12 |
+
- agent-evaluation
|
| 13 |
+
- mcp-client
|
| 14 |
---
|
| 15 |
|
| 16 |
+
# 🔍 TraceMind-AI
|
| 17 |
+
|
| 18 |
+
Agent Evaluation Platform with MCP-Powered Intelligence
|
| 19 |
+
|
| 20 |
+
## Overview
|
| 21 |
+
|
| 22 |
+
TraceMind-AI is a comprehensive platform for evaluating AI agent performance across different models, providers, and configurations. It provides real-time insights, cost analysis, and detailed trace visualization powered by the Model Context Protocol (MCP).
|
| 23 |
+
|
| 24 |
+
## Features
|
| 25 |
+
|
| 26 |
+
- **📊 Real-time Leaderboard**: Live evaluation data from HuggingFace datasets
|
| 27 |
+
- **🤖 MCP Integration**: AI-powered analysis using remote MCP servers
|
| 28 |
+
- **💰 Cost Estimation**: Calculate evaluation costs for different models and configurations
|
| 29 |
+
- **🔍 Trace Visualization**: Detailed OpenTelemetry trace analysis
|
| 30 |
+
- **📈 Performance Metrics**: GPU utilization, CO2 emissions, token usage tracking
|
| 31 |
+
|
| 32 |
+
## MCP Integration
|
| 33 |
+
|
| 34 |
+
TraceMind-AI demonstrates enterprise MCP client usage by connecting to [TraceMind-mcp-server](https://huggingface.co/spaces/kshitijthakkar/TraceMind-mcp-server) via the Model Context Protocol.
|
| 35 |
+
|
| 36 |
+
**MCP Tools Used:**
|
| 37 |
+
- `analyze_leaderboard` - AI-generated insights about evaluation trends
|
| 38 |
+
- `estimate_cost` - Cost estimation with hardware recommendations
|
| 39 |
+
- `debug_trace` - Interactive trace analysis and debugging
|
| 40 |
+
- `compare_runs` - Side-by-side run comparison
|
| 41 |
+
- `analyze_results` - Test case analysis with optimization recommendations
|
| 42 |
+
|
| 43 |
+
## Quick Start
|
| 44 |
+
|
| 45 |
+
### Prerequisites
|
| 46 |
+
- Python 3.10+
|
| 47 |
+
- HuggingFace account (for authentication)
|
| 48 |
+
- HuggingFace token (optional, for private datasets)
|
| 49 |
+
|
| 50 |
+
### Installation
|
| 51 |
+
|
| 52 |
+
1. Clone the repository:
|
| 53 |
+
```bash
|
| 54 |
+
git clone https://github.com/Mandark-droid/TraceMind-AI.git
|
| 55 |
+
cd TraceMind-AI
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
2. Install dependencies:
|
| 59 |
+
```bash
|
| 60 |
+
pip install -r requirements.txt
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
3. Configure environment:
|
| 64 |
+
```bash
|
| 65 |
+
cp .env.example .env
|
| 66 |
+
# Edit .env with your configuration
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
4. Run the application:
|
| 70 |
+
```bash
|
| 71 |
+
python app.py
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
Visit http://localhost:7860
|
| 75 |
+
|
| 76 |
+
## Configuration
|
| 77 |
+
|
| 78 |
+
Create a `.env` file with the following variables:
|
| 79 |
+
|
| 80 |
+
```env
|
| 81 |
+
# HuggingFace Configuration
|
| 82 |
+
HF_TOKEN=your_token_here
|
| 83 |
+
|
| 84 |
+
# MCP Server URL
|
| 85 |
+
MCP_SERVER_URL=https://kshitijthakkar-tracemind-mcp-server.hf.space/gradio_api/mcp/
|
| 86 |
+
|
| 87 |
+
# Dataset Configuration
|
| 88 |
+
LEADERBOARD_REPO=kshitijthakkar/smoltrace-leaderboard
|
| 89 |
+
|
| 90 |
+
# Development Mode (optional)
|
| 91 |
+
DEV_MODE=true
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
## Data Sources
|
| 95 |
+
|
| 96 |
+
TraceMind-AI loads evaluation data from HuggingFace datasets:
|
| 97 |
+
|
| 98 |
+
- **Leaderboard**: Aggregate statistics for all evaluation runs
|
| 99 |
+
- **Results**: Individual test case results
|
| 100 |
+
- **Traces**: OpenTelemetry trace data
|
| 101 |
+
- **Metrics**: GPU metrics and performance data
|
| 102 |
+
|
| 103 |
+
## Architecture
|
| 104 |
+
|
| 105 |
+
### Project Structure
|
| 106 |
+
|
| 107 |
+
```
|
| 108 |
+
TraceMind-AI/
|
| 109 |
+
├── app.py # Main Gradio application
|
| 110 |
+
├── data_loader.py # HuggingFace dataset integration
|
| 111 |
+
├── mcp_client/ # MCP client implementation
|
| 112 |
+
│ ├── client.py # Async MCP client
|
| 113 |
+
│ └── sync_wrapper.py # Synchronous wrapper
|
| 114 |
+
├── utils/ # Utilities
|
| 115 |
+
│ ├── auth.py # HuggingFace OAuth
|
| 116 |
+
│ └── navigation.py # Screen navigation
|
| 117 |
+
├── screens/ # UI screens
|
| 118 |
+
├── components/ # Reusable components
|
| 119 |
+
└── styles/ # Custom CSS
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
### MCP Client Integration
|
| 123 |
+
|
| 124 |
+
TraceMind-AI uses the MCP Python SDK to connect to remote MCP servers:
|
| 125 |
+
|
| 126 |
+
```python
|
| 127 |
+
from mcp_client.sync_wrapper import get_sync_mcp_client
|
| 128 |
+
|
| 129 |
+
# Initialize MCP client
|
| 130 |
+
mcp_client = get_sync_mcp_client()
|
| 131 |
+
mcp_client.initialize()
|
| 132 |
+
|
| 133 |
+
# Call MCP tools
|
| 134 |
+
insights = mcp_client.analyze_leaderboard(
|
| 135 |
+
metric_focus="overall",
|
| 136 |
+
time_range="last_week",
|
| 137 |
+
top_n=5
|
| 138 |
+
)
|
| 139 |
+
```
|
| 140 |
+
|
| 141 |
+
## Usage
|
| 142 |
+
|
| 143 |
+
### Viewing the Leaderboard
|
| 144 |
+
|
| 145 |
+
1. Log in with your HuggingFace account
|
| 146 |
+
2. Navigate to the "Leaderboard" tab
|
| 147 |
+
3. Click "Load Leaderboard" to fetch the latest data
|
| 148 |
+
4. View AI-powered insights generated by the MCP server
|
| 149 |
+
|
| 150 |
+
### Estimating Costs
|
| 151 |
+
|
| 152 |
+
1. Navigate to the "Cost Estimator" tab
|
| 153 |
+
2. Enter the model name (e.g., `openai/gpt-4`)
|
| 154 |
+
3. Select agent type and number of tests
|
| 155 |
+
4. Click "Estimate Cost" for AI-powered analysis
|
| 156 |
+
|
| 157 |
+
### Viewing Trace Details
|
| 158 |
+
|
| 159 |
+
1. Select an evaluation run from the leaderboard
|
| 160 |
+
2. Click on a specific test case
|
| 161 |
+
3. View detailed OpenTelemetry trace visualization
|
| 162 |
+
4. Ask questions about the trace using MCP-powered analysis
|
| 163 |
+
|
| 164 |
+
## Technology Stack
|
| 165 |
+
|
| 166 |
+
- **UI Framework**: Gradio 6.0
|
| 167 |
+
- **MCP Protocol**: MCP Python SDK 1.21.0+
|
| 168 |
+
- **Data**: HuggingFace Datasets API
|
| 169 |
+
- **Authentication**: HuggingFace OAuth
|
| 170 |
+
- **AI**: Google Gemini 2.5 Flash (via MCP server)
|
| 171 |
+
|
| 172 |
+
## Development
|
| 173 |
+
|
| 174 |
+
### Running Locally
|
| 175 |
+
|
| 176 |
+
```bash
|
| 177 |
+
# Install dependencies
|
| 178 |
+
pip install -r requirements.txt
|
| 179 |
+
|
| 180 |
+
# Set development mode
|
| 181 |
+
export DEV_MODE=true
|
| 182 |
+
|
| 183 |
+
# Run the app
|
| 184 |
+
python app.py
|
| 185 |
+
```
|
| 186 |
+
|
| 187 |
+
### Running on HuggingFace Spaces
|
| 188 |
+
|
| 189 |
+
This application is configured for deployment on HuggingFace Spaces using the Gradio SDK. The `app.py` file serves as the entry point.
|
| 190 |
+
|
| 191 |
+
## Documentation
|
| 192 |
+
|
| 193 |
+
For detailed implementation documentation, see:
|
| 194 |
+
- [Data Loader API](data_loader.py) - Dataset loading and caching
|
| 195 |
+
- [MCP Client API](mcp_client/client.py) - MCP protocol integration
|
| 196 |
+
- [Authentication](utils/auth.py) - HuggingFace OAuth integration
|
| 197 |
+
|
| 198 |
+
## Demo Video
|
| 199 |
+
|
| 200 |
+
[Link to demo video showing the application in action]
|
| 201 |
+
|
| 202 |
+
## Social Media
|
| 203 |
+
|
| 204 |
+
[Link to social media post about this project]
|
| 205 |
+
|
| 206 |
+
## License
|
| 207 |
+
|
| 208 |
+
MIT License - See LICENSE file for details
|
| 209 |
+
|
| 210 |
+
## Contributing
|
| 211 |
+
|
| 212 |
+
Contributions are welcome! Please open an issue or submit a pull request.
|
| 213 |
+
|
| 214 |
+
## Acknowledgments
|
| 215 |
+
|
| 216 |
+
- **MCP Team** - For the Model Context Protocol specification
|
| 217 |
+
- **Gradio Team** - For Gradio 6 with MCP integration
|
| 218 |
+
- **HuggingFace** - For Spaces hosting and dataset infrastructure
|
| 219 |
+
- **Google** - For Gemini API access
|
| 220 |
+
|
| 221 |
+
## Links
|
| 222 |
+
|
| 223 |
+
- **Live Demo**: https://huggingface.co/spaces/kshitijthakkar/TraceMind-AI
|
| 224 |
+
- **MCP Server**: https://huggingface.co/spaces/kshitijthakkar/TraceMind-mcp-server
|
| 225 |
+
- **GitHub**: https://github.com/Mandark-droid/TraceMind-AI
|
| 226 |
+
- **MCP Specification**: https://modelcontextprotocol.io
|
| 227 |
+
|
| 228 |
+
---
|
| 229 |
+
|
| 230 |
+
**MCP's 1st Birthday Hackathon Submission**
|
| 231 |
+
*Track: MCP in Action - Enterprise*
|
app.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
TraceMind-AI - Agent Evaluation Platform
|
| 3 |
+
MCP Client consuming TraceMind-mcp-server for intelligent analysis
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import gradio as gr
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
import pandas as pd
|
| 10 |
+
|
| 11 |
+
# Load environment variables
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
# Import utilities
|
| 15 |
+
from utils.auth import is_authenticated, get_user_info, create_login_button, create_user_info_display, DEV_MODE
|
| 16 |
+
from utils.navigation import Navigator, Screen
|
| 17 |
+
from data_loader import create_data_loader_from_env
|
| 18 |
+
from styles.tracemind_theme import get_tracemind_css
|
| 19 |
+
from mcp_client.sync_wrapper import get_sync_mcp_client
|
| 20 |
+
|
| 21 |
+
# Initialize
|
| 22 |
+
data_loader = create_data_loader_from_env()
|
| 23 |
+
navigator = Navigator()
|
| 24 |
+
mcp_client = get_sync_mcp_client()
|
| 25 |
+
|
| 26 |
+
# Global state
|
| 27 |
+
current_selected_run = None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def load_leaderboard_view(token, profile):
|
| 31 |
+
"""Load and display the leaderboard with MCP-powered insights"""
|
| 32 |
+
if not is_authenticated(token, profile):
|
| 33 |
+
return "Please log in to view the leaderboard", ""
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
# Load real data from HuggingFace
|
| 37 |
+
leaderboard_df = data_loader.load_leaderboard()
|
| 38 |
+
|
| 39 |
+
if leaderboard_df.empty:
|
| 40 |
+
return "No evaluation runs found in the leaderboard", ""
|
| 41 |
+
|
| 42 |
+
# Format dataframe for display
|
| 43 |
+
display_df = leaderboard_df[[
|
| 44 |
+
'model', 'agent_type', 'success_rate', 'total_tests',
|
| 45 |
+
'avg_duration_ms', 'total_cost_usd', 'co2_emissions_g'
|
| 46 |
+
]].copy()
|
| 47 |
+
|
| 48 |
+
# Round numeric columns
|
| 49 |
+
display_df['success_rate'] = display_df['success_rate'].round(1)
|
| 50 |
+
display_df['avg_duration_ms'] = display_df['avg_duration_ms'].round(0)
|
| 51 |
+
display_df['total_cost_usd'] = display_df['total_cost_usd'].round(4)
|
| 52 |
+
display_df['co2_emissions_g'] = display_df['co2_emissions_g'].round(2)
|
| 53 |
+
|
| 54 |
+
# Get MCP-powered insights
|
| 55 |
+
try:
|
| 56 |
+
insights = mcp_client.analyze_leaderboard(
|
| 57 |
+
metric_focus="overall",
|
| 58 |
+
time_range="all_time",
|
| 59 |
+
top_n=5,
|
| 60 |
+
hf_token=os.getenv('HF_TOKEN'),
|
| 61 |
+
gemini_api_key=os.getenv('GEMINI_API_KEY')
|
| 62 |
+
)
|
| 63 |
+
except Exception as e:
|
| 64 |
+
insights = f"⚠️ MCP analysis unavailable: {str(e)}\n\n(Server may need initialization)"
|
| 65 |
+
|
| 66 |
+
return display_df, insights
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
return f"Error loading leaderboard: {e}", ""
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def estimate_evaluation_cost(model, agent_type, num_tests):
|
| 73 |
+
"""Estimate cost for a new evaluation using MCP server"""
|
| 74 |
+
try:
|
| 75 |
+
cost_estimate = mcp_client.estimate_cost(
|
| 76 |
+
model=model,
|
| 77 |
+
agent_type=agent_type,
|
| 78 |
+
num_tests=int(num_tests),
|
| 79 |
+
hf_token=os.getenv('HF_TOKEN'),
|
| 80 |
+
gemini_api_key=os.getenv('GEMINI_API_KEY')
|
| 81 |
+
)
|
| 82 |
+
return cost_estimate
|
| 83 |
+
except Exception as e:
|
| 84 |
+
return f"❌ Error estimating cost: {str(e)}"
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def build_ui():
|
| 88 |
+
"""Build the Gradio UI"""
|
| 89 |
+
|
| 90 |
+
with gr.Blocks(css=get_tracemind_css(), title="TraceMind-AI") as demo:
|
| 91 |
+
# Header
|
| 92 |
+
gr.Markdown("""
|
| 93 |
+
# 🔍 TraceMind-AI
|
| 94 |
+
### Agent Evaluation Platform with MCP-Powered Intelligence
|
| 95 |
+
|
| 96 |
+
**Powered by:**
|
| 97 |
+
- 📊 Real data from HuggingFace datasets
|
| 98 |
+
- 🤖 MCP Server for AI-powered insights ([TraceMind-mcp-server](https://huggingface.co/spaces/kshitijthakkar/TraceMind-mcp-server))
|
| 99 |
+
- 🧠 Google Gemini 2.5 Flash for analysis
|
| 100 |
+
""")
|
| 101 |
+
|
| 102 |
+
# Authentication
|
| 103 |
+
with gr.Row():
|
| 104 |
+
with gr.Column(scale=2):
|
| 105 |
+
user_display = gr.HTML(create_user_info_display(None))
|
| 106 |
+
with gr.Column(scale=1):
|
| 107 |
+
login_btn = create_login_button()
|
| 108 |
+
|
| 109 |
+
# Main content (shown when authenticated)
|
| 110 |
+
with gr.Column(visible=DEV_MODE) as main_content:
|
| 111 |
+
with gr.Tabs() as tabs:
|
| 112 |
+
# Tab 1: Leaderboard
|
| 113 |
+
with gr.Tab("📊 Leaderboard"):
|
| 114 |
+
gr.Markdown("### Agent Evaluation Leaderboard")
|
| 115 |
+
gr.Markdown("Real-time data from `kshitijthakkar/smoltrace-leaderboard`")
|
| 116 |
+
|
| 117 |
+
load_leaderboard_btn = gr.Button("🔄 Load Leaderboard", variant="primary")
|
| 118 |
+
|
| 119 |
+
with gr.Row():
|
| 120 |
+
with gr.Column(scale=2):
|
| 121 |
+
leaderboard_table = gr.Dataframe(
|
| 122 |
+
headers=["Model", "Agent Type", "Success Rate %", "Total Tests", "Avg Duration (ms)", "Cost ($)", "CO2 (g)"],
|
| 123 |
+
label="Evaluation Runs",
|
| 124 |
+
interactive=False
|
| 125 |
+
)
|
| 126 |
+
with gr.Column(scale=1):
|
| 127 |
+
leaderboard_insights = gr.Markdown("**MCP Analysis:**\n\nClick 'Load Leaderboard' to see AI-powered insights")
|
| 128 |
+
|
| 129 |
+
# Tab 2: Cost Estimator
|
| 130 |
+
with gr.Tab("💰 Cost Estimator"):
|
| 131 |
+
gr.Markdown("### Estimate Evaluation Costs")
|
| 132 |
+
gr.Markdown("Uses MCP server to calculate costs for different models and configurations")
|
| 133 |
+
|
| 134 |
+
with gr.Row():
|
| 135 |
+
model_input = gr.Textbox(
|
| 136 |
+
label="Model",
|
| 137 |
+
placeholder="openai/gpt-4 or meta-llama/Llama-3.1-8B",
|
| 138 |
+
value="openai/gpt-4"
|
| 139 |
+
)
|
| 140 |
+
agent_type_input = gr.Dropdown(
|
| 141 |
+
["tool", "code", "both"],
|
| 142 |
+
label="Agent Type",
|
| 143 |
+
value="both"
|
| 144 |
+
)
|
| 145 |
+
num_tests_input = gr.Number(
|
| 146 |
+
label="Number of Tests",
|
| 147 |
+
value=100
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
estimate_btn = gr.Button("💵 Estimate Cost", variant="primary")
|
| 151 |
+
cost_output = gr.Markdown("**Cost Estimate:**\n\nEnter details and click 'Estimate Cost'")
|
| 152 |
+
|
| 153 |
+
# Tab 3: MCP Server Status
|
| 154 |
+
with gr.Tab("🔧 MCP Status"):
|
| 155 |
+
gr.Markdown("### TraceMind MCP Server Connection")
|
| 156 |
+
|
| 157 |
+
mcp_url_display = gr.Textbox(
|
| 158 |
+
label="MCP Server URL",
|
| 159 |
+
value=os.getenv('MCP_SERVER_URL', 'https://kshitijthakkar-tracemind-mcp-server.hf.space/gradio_api/mcp/'),
|
| 160 |
+
interactive=False
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
test_mcp_btn = gr.Button("🧪 Test MCP Connection", variant="secondary")
|
| 164 |
+
mcp_status = gr.Markdown("**Status:** Not tested yet")
|
| 165 |
+
|
| 166 |
+
# Event handlers
|
| 167 |
+
def handle_login(token, profile):
|
| 168 |
+
user = get_user_info(token, profile)
|
| 169 |
+
return create_user_info_display(user), gr.update(visible=True)
|
| 170 |
+
|
| 171 |
+
login_btn.click(
|
| 172 |
+
fn=handle_login,
|
| 173 |
+
inputs=[login_btn, login_btn], # Gradio provides token/profile automatically
|
| 174 |
+
outputs=[user_display, main_content]
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
load_leaderboard_btn.click(
|
| 178 |
+
fn=load_leaderboard_view,
|
| 179 |
+
inputs=[login_btn, login_btn],
|
| 180 |
+
outputs=[leaderboard_table, leaderboard_insights]
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
estimate_btn.click(
|
| 184 |
+
fn=estimate_evaluation_cost,
|
| 185 |
+
inputs=[model_input, agent_type_input, num_tests_input],
|
| 186 |
+
outputs=[cost_output]
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
def test_mcp_connection():
|
| 190 |
+
try:
|
| 191 |
+
mcp_client.initialize()
|
| 192 |
+
return "✅ **Connected Successfully!**\n\nMCP server is online and ready"
|
| 193 |
+
except Exception as e:
|
| 194 |
+
return f"❌ **Connection Failed**\n\nError: {str(e)}"
|
| 195 |
+
|
| 196 |
+
test_mcp_btn.click(
|
| 197 |
+
fn=test_mcp_connection,
|
| 198 |
+
outputs=[mcp_status]
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
return demo
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
if __name__ == "__main__":
|
| 205 |
+
print("🚀 Starting TraceMind-AI...")
|
| 206 |
+
print(f"📊 Leaderboard: {os.getenv('LEADERBOARD_REPO', 'kshitijthakkar/smoltrace-leaderboard')}")
|
| 207 |
+
print(f"🤖 MCP Server: {os.getenv('MCP_SERVER_URL', 'https://kshitijthakkar-tracemind-mcp-server.hf.space/gradio_api/mcp/')}")
|
| 208 |
+
print(f"🛠️ Dev Mode: {DEV_MODE}")
|
| 209 |
+
|
| 210 |
+
demo = build_ui()
|
| 211 |
+
demo.launch(
|
| 212 |
+
server_name="0.0.0.0",
|
| 213 |
+
server_port=7860,
|
| 214 |
+
share=False
|
| 215 |
+
)
|
data_loader.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data Loader for TraceMind-AI
|
| 3 |
+
Loads real data from HuggingFace datasets (not mock data)
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from typing import Optional, Dict, Any, List
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
|
| 12 |
+
# Load environment variables
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class TraceMindDataLoader:
|
| 17 |
+
"""Loads evaluation data from HuggingFace datasets"""
|
| 18 |
+
|
| 19 |
+
def __init__(
|
| 20 |
+
self,
|
| 21 |
+
leaderboard_repo: Optional[str] = None,
|
| 22 |
+
hf_token: Optional[str] = None
|
| 23 |
+
):
|
| 24 |
+
"""
|
| 25 |
+
Initialize data loader
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
leaderboard_repo: HuggingFace dataset repo for leaderboard
|
| 29 |
+
hf_token: HuggingFace API token for private datasets
|
| 30 |
+
"""
|
| 31 |
+
self.leaderboard_repo = leaderboard_repo or os.getenv(
|
| 32 |
+
'LEADERBOARD_REPO',
|
| 33 |
+
'kshitijthakkar/smoltrace-leaderboard'
|
| 34 |
+
)
|
| 35 |
+
self.hf_token = hf_token or os.getenv('HF_TOKEN')
|
| 36 |
+
|
| 37 |
+
# Cache for loaded datasets
|
| 38 |
+
self._leaderboard_df: Optional[pd.DataFrame] = None
|
| 39 |
+
self._results_cache: Dict[str, pd.DataFrame] = {}
|
| 40 |
+
self._traces_cache: Dict[str, List[Dict]] = {}
|
| 41 |
+
self._metrics_cache: Dict[str, Dict] = {}
|
| 42 |
+
|
| 43 |
+
def load_leaderboard(self, force_refresh: bool = False) -> pd.DataFrame:
|
| 44 |
+
"""
|
| 45 |
+
Load leaderboard dataset from HuggingFace
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
force_refresh: Force reload from HF (ignore cache)
|
| 49 |
+
|
| 50 |
+
Returns:
|
| 51 |
+
DataFrame with leaderboard data
|
| 52 |
+
"""
|
| 53 |
+
if self._leaderboard_df is not None and not force_refresh:
|
| 54 |
+
return self._leaderboard_df
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
print(f"📊 Loading leaderboard from {self.leaderboard_repo}...")
|
| 58 |
+
|
| 59 |
+
# Load dataset from HuggingFace
|
| 60 |
+
dataset = load_dataset(
|
| 61 |
+
self.leaderboard_repo,
|
| 62 |
+
split='train',
|
| 63 |
+
token=self.hf_token
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Convert to DataFrame
|
| 67 |
+
self._leaderboard_df = pd.DataFrame(dataset)
|
| 68 |
+
|
| 69 |
+
print(f"✅ Loaded {len(self._leaderboard_df)} evaluation runs")
|
| 70 |
+
return self._leaderboard_df
|
| 71 |
+
|
| 72 |
+
except Exception as e:
|
| 73 |
+
print(f"❌ Error loading leaderboard: {e}")
|
| 74 |
+
# Return empty DataFrame with expected columns
|
| 75 |
+
return pd.DataFrame(columns=[
|
| 76 |
+
'run_id', 'model', 'agent_type', 'provider',
|
| 77 |
+
'success_rate', 'total_tests', 'successful_tests', 'failed_tests',
|
| 78 |
+
'avg_steps', 'avg_duration_ms', 'total_duration_ms',
|
| 79 |
+
'total_tokens', 'avg_tokens_per_test', 'total_cost_usd', 'avg_cost_per_test_usd',
|
| 80 |
+
'co2_emissions_g', 'gpu_utilization_avg', 'gpu_memory_max_mib',
|
| 81 |
+
'results_dataset', 'traces_dataset', 'metrics_dataset',
|
| 82 |
+
'timestamp', 'submitted_by', 'hf_job_id', 'job_type',
|
| 83 |
+
'dataset_used', 'smoltrace_version'
|
| 84 |
+
])
|
| 85 |
+
|
| 86 |
+
def load_results(self, results_repo: str, force_refresh: bool = False) -> pd.DataFrame:
|
| 87 |
+
"""
|
| 88 |
+
Load results dataset for a specific run
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
results_repo: HuggingFace dataset repo for results (e.g., 'user/agent-results-gpt4')
|
| 92 |
+
force_refresh: Force reload from HF
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
DataFrame with test case results
|
| 96 |
+
"""
|
| 97 |
+
if results_repo in self._results_cache and not force_refresh:
|
| 98 |
+
return self._results_cache[results_repo]
|
| 99 |
+
|
| 100 |
+
try:
|
| 101 |
+
print(f"📊 Loading results from {results_repo}...")
|
| 102 |
+
|
| 103 |
+
dataset = load_dataset(
|
| 104 |
+
results_repo,
|
| 105 |
+
split='train',
|
| 106 |
+
token=self.hf_token
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
df = pd.DataFrame(dataset)
|
| 110 |
+
self._results_cache[results_repo] = df
|
| 111 |
+
|
| 112 |
+
print(f"✅ Loaded {len(df)} test cases")
|
| 113 |
+
return df
|
| 114 |
+
|
| 115 |
+
except Exception as e:
|
| 116 |
+
print(f"❌ Error loading results: {e}")
|
| 117 |
+
return pd.DataFrame(columns=[
|
| 118 |
+
'run_id', 'task_id', 'test_index',
|
| 119 |
+
'prompt', 'expected_tool', 'difficulty', 'category',
|
| 120 |
+
'success', 'response', 'tool_called', 'tool_correct',
|
| 121 |
+
'expected_keywords', 'keywords_matched',
|
| 122 |
+
'execution_time_ms', 'total_tokens', 'prompt_tokens', 'completion_tokens', 'cost_usd',
|
| 123 |
+
'trace_id', 'start_time', 'end_time', 'start_time_unix_nano', 'end_time_unix_nano',
|
| 124 |
+
'error', 'error_type'
|
| 125 |
+
])
|
| 126 |
+
|
| 127 |
+
def load_traces(self, traces_repo: str, force_refresh: bool = False) -> List[Dict[str, Any]]:
|
| 128 |
+
"""
|
| 129 |
+
Load traces dataset for a specific run
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
traces_repo: HuggingFace dataset repo for traces
|
| 133 |
+
force_refresh: Force reload from HF
|
| 134 |
+
|
| 135 |
+
Returns:
|
| 136 |
+
List of trace dictionaries (OpenTelemetry format)
|
| 137 |
+
"""
|
| 138 |
+
if traces_repo in self._traces_cache and not force_refresh:
|
| 139 |
+
return self._traces_cache[traces_repo]
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
print(f"🔍 Loading traces from {traces_repo}...")
|
| 143 |
+
|
| 144 |
+
dataset = load_dataset(
|
| 145 |
+
traces_repo,
|
| 146 |
+
split='train',
|
| 147 |
+
token=self.hf_token
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# Convert to list of dicts
|
| 151 |
+
traces = [dict(item) for item in dataset]
|
| 152 |
+
self._traces_cache[traces_repo] = traces
|
| 153 |
+
|
| 154 |
+
print(f"✅ Loaded {len(traces)} traces")
|
| 155 |
+
return traces
|
| 156 |
+
|
| 157 |
+
except Exception as e:
|
| 158 |
+
print(f"❌ Error loading traces: {e}")
|
| 159 |
+
return []
|
| 160 |
+
|
| 161 |
+
def load_metrics(self, metrics_repo: str, force_refresh: bool = False) -> Dict[str, Any]:
|
| 162 |
+
"""
|
| 163 |
+
Load GPU metrics dataset for a specific run
|
| 164 |
+
|
| 165 |
+
Args:
|
| 166 |
+
metrics_repo: HuggingFace dataset repo for metrics
|
| 167 |
+
force_refresh: Force reload from HF
|
| 168 |
+
|
| 169 |
+
Returns:
|
| 170 |
+
Metrics data (OpenTelemetry metrics format)
|
| 171 |
+
"""
|
| 172 |
+
if metrics_repo in self._metrics_cache and not force_refresh:
|
| 173 |
+
return self._metrics_cache[metrics_repo]
|
| 174 |
+
|
| 175 |
+
try:
|
| 176 |
+
print(f"📈 Loading metrics from {metrics_repo}...")
|
| 177 |
+
|
| 178 |
+
dataset = load_dataset(
|
| 179 |
+
metrics_repo,
|
| 180 |
+
split='train',
|
| 181 |
+
token=self.hf_token
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
# Assume metrics dataset has one row with all metrics
|
| 185 |
+
if len(dataset) > 0:
|
| 186 |
+
metrics = dict(dataset[0])
|
| 187 |
+
self._metrics_cache[metrics_repo] = metrics
|
| 188 |
+
print(f"✅ Loaded metrics data")
|
| 189 |
+
return metrics
|
| 190 |
+
else:
|
| 191 |
+
print(f"⚠️ No metrics data found")
|
| 192 |
+
return {}
|
| 193 |
+
|
| 194 |
+
except Exception as e:
|
| 195 |
+
print(f"❌ Error loading metrics: {e}")
|
| 196 |
+
return {}
|
| 197 |
+
|
| 198 |
+
def get_run_by_id(self, run_id: str) -> Optional[Dict[str, Any]]:
|
| 199 |
+
"""
|
| 200 |
+
Get a specific run from the leaderboard by run_id
|
| 201 |
+
|
| 202 |
+
Args:
|
| 203 |
+
run_id: Run ID to fetch
|
| 204 |
+
|
| 205 |
+
Returns:
|
| 206 |
+
Run data as dict, or None if not found
|
| 207 |
+
"""
|
| 208 |
+
leaderboard_df = self.load_leaderboard()
|
| 209 |
+
|
| 210 |
+
run_rows = leaderboard_df[leaderboard_df['run_id'] == run_id]
|
| 211 |
+
|
| 212 |
+
if len(run_rows) > 0:
|
| 213 |
+
return run_rows.iloc[0].to_dict()
|
| 214 |
+
else:
|
| 215 |
+
return None
|
| 216 |
+
|
| 217 |
+
def get_trace_by_id(self, traces_repo: str, trace_id: str) -> Optional[Dict[str, Any]]:
|
| 218 |
+
"""
|
| 219 |
+
Get a specific trace by trace_id
|
| 220 |
+
|
| 221 |
+
Args:
|
| 222 |
+
traces_repo: HuggingFace dataset repo for traces
|
| 223 |
+
trace_id: Trace ID to fetch
|
| 224 |
+
|
| 225 |
+
Returns:
|
| 226 |
+
Trace data as dict, or None if not found
|
| 227 |
+
"""
|
| 228 |
+
traces = self.load_traces(traces_repo)
|
| 229 |
+
|
| 230 |
+
for trace in traces:
|
| 231 |
+
if trace.get('trace_id') == trace_id or trace.get('traceId') == trace_id:
|
| 232 |
+
return trace
|
| 233 |
+
|
| 234 |
+
return None
|
| 235 |
+
|
| 236 |
+
def clear_cache(self):
|
| 237 |
+
"""Clear all cached data"""
|
| 238 |
+
self._leaderboard_df = None
|
| 239 |
+
self._results_cache.clear()
|
| 240 |
+
self._traces_cache.clear()
|
| 241 |
+
self._metrics_cache.clear()
|
| 242 |
+
print("🧹 Cache cleared")
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
def create_data_loader_from_env() -> TraceMindDataLoader:
|
| 246 |
+
"""
|
| 247 |
+
Create a data loader using environment variables
|
| 248 |
+
|
| 249 |
+
Returns:
|
| 250 |
+
TraceMindDataLoader instance
|
| 251 |
+
"""
|
| 252 |
+
return TraceMindDataLoader(
|
| 253 |
+
leaderboard_repo=os.getenv('LEADERBOARD_REPO'),
|
| 254 |
+
hf_token=os.getenv('HF_TOKEN')
|
| 255 |
+
)
|
mcp_client/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
MCP Client for TraceMind-AI
|
| 3 |
+
Connects to the TraceMind-mcp-server to use real MCP tools
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from .client import MCPClient
|
| 7 |
+
|
| 8 |
+
__all__ = ['MCPClient']
|
mcp_client/client.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
MCP Client for connecting to TraceMind-mcp-server
|
| 3 |
+
Uses MCP protocol over HTTP to call remote MCP tools
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import asyncio
|
| 8 |
+
from typing import Optional, Dict, Any, List
|
| 9 |
+
from mcp import ClientSession, StdioServerParameters
|
| 10 |
+
from mcp.client.sse import sse_client
|
| 11 |
+
import aiohttp
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class MCPClient:
|
| 15 |
+
"""Client for interacting with TraceMind MCP Server"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, server_url: Optional[str] = None):
|
| 18 |
+
"""
|
| 19 |
+
Initialize MCP Client
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
server_url: URL of the TraceMind-mcp-server endpoint
|
| 23 |
+
If None, uses MCP_SERVER_URL from environment
|
| 24 |
+
"""
|
| 25 |
+
self.server_url = server_url or os.getenv(
|
| 26 |
+
'MCP_SERVER_URL',
|
| 27 |
+
'https://kshitijthakkar-tracemind-mcp-server.hf.space/gradio_api/mcp/'
|
| 28 |
+
)
|
| 29 |
+
self.session: Optional[ClientSession] = None
|
| 30 |
+
self._initialized = False
|
| 31 |
+
|
| 32 |
+
async def initialize(self):
|
| 33 |
+
"""Initialize connection to MCP server"""
|
| 34 |
+
if self._initialized:
|
| 35 |
+
return
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
# Connect to SSE endpoint
|
| 39 |
+
async with sse_client(self.server_url) as (read, write):
|
| 40 |
+
async with ClientSession(read, write) as session:
|
| 41 |
+
self.session = session
|
| 42 |
+
await session.initialize()
|
| 43 |
+
self._initialized = True
|
| 44 |
+
|
| 45 |
+
# List available tools for verification
|
| 46 |
+
tools_result = await session.list_tools()
|
| 47 |
+
print(f"✅ Connected to TraceMind MCP Server at {self.server_url}")
|
| 48 |
+
print(f"📊 Available tools: {len(tools_result.tools)}")
|
| 49 |
+
for tool in tools_result.tools:
|
| 50 |
+
print(f" - {tool.name}: {tool.description}")
|
| 51 |
+
|
| 52 |
+
except Exception as e:
|
| 53 |
+
print(f"❌ Failed to connect to MCP server: {e}")
|
| 54 |
+
raise
|
| 55 |
+
|
| 56 |
+
async def analyze_leaderboard(
|
| 57 |
+
self,
|
| 58 |
+
leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
|
| 59 |
+
metric_focus: str = "overall",
|
| 60 |
+
time_range: str = "last_week",
|
| 61 |
+
top_n: int = 5,
|
| 62 |
+
hf_token: Optional[str] = None,
|
| 63 |
+
gemini_api_key: Optional[str] = None
|
| 64 |
+
) -> str:
|
| 65 |
+
"""
|
| 66 |
+
Call the analyze_leaderboard tool on MCP server
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
leaderboard_repo: HuggingFace dataset repo for leaderboard
|
| 70 |
+
metric_focus: Focus metric (overall, accuracy, cost, latency, co2)
|
| 71 |
+
time_range: Time range filter (last_week, last_month, all_time)
|
| 72 |
+
top_n: Number of top models to highlight
|
| 73 |
+
hf_token: HuggingFace API token (optional if public dataset)
|
| 74 |
+
gemini_api_key: Google Gemini API key (optional, server may have it)
|
| 75 |
+
|
| 76 |
+
Returns:
|
| 77 |
+
AI-generated analysis of the leaderboard
|
| 78 |
+
"""
|
| 79 |
+
if not self._initialized:
|
| 80 |
+
await self.initialize()
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
# Build arguments
|
| 84 |
+
args = {
|
| 85 |
+
"leaderboard_repo": leaderboard_repo,
|
| 86 |
+
"metric_focus": metric_focus,
|
| 87 |
+
"time_range": time_range,
|
| 88 |
+
"top_n": top_n
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
# Add optional tokens if provided
|
| 92 |
+
if hf_token:
|
| 93 |
+
args["hf_token"] = hf_token
|
| 94 |
+
if gemini_api_key:
|
| 95 |
+
args["gemini_api_key"] = gemini_api_key
|
| 96 |
+
|
| 97 |
+
# Call MCP tool
|
| 98 |
+
result = await self.session.call_tool("analyze_leaderboard", arguments=args)
|
| 99 |
+
|
| 100 |
+
# Extract text from result
|
| 101 |
+
if result.content and len(result.content) > 0:
|
| 102 |
+
return result.content[0].text
|
| 103 |
+
else:
|
| 104 |
+
return "No analysis generated"
|
| 105 |
+
|
| 106 |
+
except Exception as e:
|
| 107 |
+
return f"❌ Error calling analyze_leaderboard: {str(e)}"
|
| 108 |
+
|
| 109 |
+
async def debug_trace(
|
| 110 |
+
self,
|
| 111 |
+
trace_data: Dict[str, Any],
|
| 112 |
+
question: str,
|
| 113 |
+
metrics_data: Optional[Dict[str, Any]] = None,
|
| 114 |
+
hf_token: Optional[str] = None,
|
| 115 |
+
gemini_api_key: Optional[str] = None
|
| 116 |
+
) -> str:
|
| 117 |
+
"""
|
| 118 |
+
Call the debug_trace tool on MCP server
|
| 119 |
+
|
| 120 |
+
Args:
|
| 121 |
+
trace_data: OpenTelemetry trace data (dict with spans)
|
| 122 |
+
question: User question about the trace
|
| 123 |
+
metrics_data: Optional GPU metrics data
|
| 124 |
+
hf_token: HuggingFace API token
|
| 125 |
+
gemini_api_key: Google Gemini API key
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
AI-generated answer to the trace question
|
| 129 |
+
"""
|
| 130 |
+
if not self._initialized:
|
| 131 |
+
await self.initialize()
|
| 132 |
+
|
| 133 |
+
try:
|
| 134 |
+
args = {
|
| 135 |
+
"trace_data": trace_data,
|
| 136 |
+
"question": question
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
if metrics_data:
|
| 140 |
+
args["metrics_data"] = metrics_data
|
| 141 |
+
if hf_token:
|
| 142 |
+
args["hf_token"] = hf_token
|
| 143 |
+
if gemini_api_key:
|
| 144 |
+
args["gemini_api_key"] = gemini_api_key
|
| 145 |
+
|
| 146 |
+
result = await self.session.call_tool("debug_trace", arguments=args)
|
| 147 |
+
|
| 148 |
+
if result.content and len(result.content) > 0:
|
| 149 |
+
return result.content[0].text
|
| 150 |
+
else:
|
| 151 |
+
return "No answer generated"
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
return f"❌ Error calling debug_trace: {str(e)}"
|
| 155 |
+
|
| 156 |
+
async def estimate_cost(
|
| 157 |
+
self,
|
| 158 |
+
model: str,
|
| 159 |
+
agent_type: str = "both",
|
| 160 |
+
num_tests: int = 100,
|
| 161 |
+
hardware: Optional[str] = None,
|
| 162 |
+
hf_token: Optional[str] = None,
|
| 163 |
+
gemini_api_key: Optional[str] = None
|
| 164 |
+
) -> str:
|
| 165 |
+
"""
|
| 166 |
+
Call the estimate_cost tool on MCP server
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
model: Model name (e.g., 'openai/gpt-4', 'meta-llama/Llama-3.1-8B')
|
| 170 |
+
agent_type: Agent type (tool, code, both)
|
| 171 |
+
num_tests: Number of tests to run
|
| 172 |
+
hardware: Hardware type (cpu, gpu_a10, gpu_h200)
|
| 173 |
+
hf_token: HuggingFace API token
|
| 174 |
+
gemini_api_key: Google Gemini API key
|
| 175 |
+
|
| 176 |
+
Returns:
|
| 177 |
+
Cost estimation with breakdown
|
| 178 |
+
"""
|
| 179 |
+
if not self._initialized:
|
| 180 |
+
await self.initialize()
|
| 181 |
+
|
| 182 |
+
try:
|
| 183 |
+
args = {
|
| 184 |
+
"model": model,
|
| 185 |
+
"agent_type": agent_type,
|
| 186 |
+
"num_tests": num_tests
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
if hardware:
|
| 190 |
+
args["hardware"] = hardware
|
| 191 |
+
if hf_token:
|
| 192 |
+
args["hf_token"] = hf_token
|
| 193 |
+
if gemini_api_key:
|
| 194 |
+
args["gemini_api_key"] = gemini_api_key
|
| 195 |
+
|
| 196 |
+
result = await self.session.call_tool("estimate_cost", arguments=args)
|
| 197 |
+
|
| 198 |
+
if result.content and len(result.content) > 0:
|
| 199 |
+
return result.content[0].text
|
| 200 |
+
else:
|
| 201 |
+
return "No estimation generated"
|
| 202 |
+
|
| 203 |
+
except Exception as e:
|
| 204 |
+
return f"❌ Error calling estimate_cost: {str(e)}"
|
| 205 |
+
|
| 206 |
+
async def compare_runs(
|
| 207 |
+
self,
|
| 208 |
+
run_data_list: List[Dict[str, Any]],
|
| 209 |
+
focus_metrics: Optional[List[str]] = None,
|
| 210 |
+
hf_token: Optional[str] = None,
|
| 211 |
+
gemini_api_key: Optional[str] = None
|
| 212 |
+
) -> str:
|
| 213 |
+
"""
|
| 214 |
+
Call the compare_runs tool on MCP server
|
| 215 |
+
|
| 216 |
+
Args:
|
| 217 |
+
run_data_list: List of run data dicts from leaderboard
|
| 218 |
+
focus_metrics: List of metrics to focus on
|
| 219 |
+
hf_token: HuggingFace API token
|
| 220 |
+
gemini_api_key: Google Gemini API key
|
| 221 |
+
|
| 222 |
+
Returns:
|
| 223 |
+
AI-generated comparison analysis
|
| 224 |
+
"""
|
| 225 |
+
if not self._initialized:
|
| 226 |
+
await self.initialize()
|
| 227 |
+
|
| 228 |
+
try:
|
| 229 |
+
args = {
|
| 230 |
+
"run_data_list": run_data_list
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
if focus_metrics:
|
| 234 |
+
args["focus_metrics"] = focus_metrics
|
| 235 |
+
if hf_token:
|
| 236 |
+
args["hf_token"] = hf_token
|
| 237 |
+
if gemini_api_key:
|
| 238 |
+
args["gemini_api_key"] = gemini_api_key
|
| 239 |
+
|
| 240 |
+
result = await self.session.call_tool("compare_runs", arguments=args)
|
| 241 |
+
|
| 242 |
+
if result.content and len(result.content) > 0:
|
| 243 |
+
return result.content[0].text
|
| 244 |
+
else:
|
| 245 |
+
return "No comparison generated"
|
| 246 |
+
|
| 247 |
+
except Exception as e:
|
| 248 |
+
return f"❌ Error calling compare_runs: {str(e)}"
|
| 249 |
+
|
| 250 |
+
async def analyze_results(
|
| 251 |
+
self,
|
| 252 |
+
results_data: List[Dict[str, Any]],
|
| 253 |
+
analysis_focus: str = "optimization",
|
| 254 |
+
hf_token: Optional[str] = None,
|
| 255 |
+
gemini_api_key: Optional[str] = None
|
| 256 |
+
) -> str:
|
| 257 |
+
"""
|
| 258 |
+
Call the analyze_results tool on MCP server
|
| 259 |
+
|
| 260 |
+
Args:
|
| 261 |
+
results_data: List of test case results
|
| 262 |
+
analysis_focus: Focus area (optimization, failures, performance, cost)
|
| 263 |
+
hf_token: HuggingFace API token
|
| 264 |
+
gemini_api_key: Google Gemini API key
|
| 265 |
+
|
| 266 |
+
Returns:
|
| 267 |
+
AI-generated results analysis with recommendations
|
| 268 |
+
"""
|
| 269 |
+
if not self._initialized:
|
| 270 |
+
await self.initialize()
|
| 271 |
+
|
| 272 |
+
try:
|
| 273 |
+
args = {
|
| 274 |
+
"results_data": results_data,
|
| 275 |
+
"analysis_focus": analysis_focus
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
if hf_token:
|
| 279 |
+
args["hf_token"] = hf_token
|
| 280 |
+
if gemini_api_key:
|
| 281 |
+
args["gemini_api_key"] = gemini_api_key
|
| 282 |
+
|
| 283 |
+
result = await self.session.call_tool("analyze_results", arguments=args)
|
| 284 |
+
|
| 285 |
+
if result.content and len(result.content) > 0:
|
| 286 |
+
return result.content[0].text
|
| 287 |
+
else:
|
| 288 |
+
return "No analysis generated"
|
| 289 |
+
|
| 290 |
+
except Exception as e:
|
| 291 |
+
return f"❌ Error calling analyze_results: {str(e)}"
|
| 292 |
+
|
| 293 |
+
async def get_dataset_info(
|
| 294 |
+
self,
|
| 295 |
+
dataset_repo: str,
|
| 296 |
+
hf_token: Optional[str] = None,
|
| 297 |
+
gemini_api_key: Optional[str] = None
|
| 298 |
+
) -> str:
|
| 299 |
+
"""
|
| 300 |
+
Call the get_dataset tool on MCP server (resource)
|
| 301 |
+
|
| 302 |
+
Args:
|
| 303 |
+
dataset_repo: HuggingFace dataset repo
|
| 304 |
+
hf_token: HuggingFace API token
|
| 305 |
+
gemini_api_key: Google Gemini API key
|
| 306 |
+
|
| 307 |
+
Returns:
|
| 308 |
+
Dataset information and structure
|
| 309 |
+
"""
|
| 310 |
+
if not self._initialized:
|
| 311 |
+
await self.initialize()
|
| 312 |
+
|
| 313 |
+
try:
|
| 314 |
+
args = {
|
| 315 |
+
"dataset_repo": dataset_repo
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
if hf_token:
|
| 319 |
+
args["hf_token"] = hf_token
|
| 320 |
+
if gemini_api_key:
|
| 321 |
+
args["gemini_api_key"] = gemini_api_key
|
| 322 |
+
|
| 323 |
+
result = await self.session.call_tool("get_dataset", arguments=args)
|
| 324 |
+
|
| 325 |
+
if result.content and len(result.content) > 0:
|
| 326 |
+
return result.content[0].text
|
| 327 |
+
else:
|
| 328 |
+
return "No dataset info generated"
|
| 329 |
+
|
| 330 |
+
except Exception as e:
|
| 331 |
+
return f"❌ Error calling get_dataset: {str(e)}"
|
| 332 |
+
|
| 333 |
+
async def close(self):
|
| 334 |
+
"""Close the MCP client session"""
|
| 335 |
+
if self.session:
|
| 336 |
+
# Note: ClientSession doesn't have an explicit close method
|
| 337 |
+
# The context manager handles cleanup
|
| 338 |
+
self.session = None
|
| 339 |
+
self._initialized = False
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
# Singleton instance for use across the app
|
| 343 |
+
_mcp_client_instance: Optional[MCPClient] = None
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
def get_mcp_client() -> MCPClient:
|
| 347 |
+
"""Get or create the global MCP client instance"""
|
| 348 |
+
global _mcp_client_instance
|
| 349 |
+
if _mcp_client_instance is None:
|
| 350 |
+
_mcp_client_instance = MCPClient()
|
| 351 |
+
return _mcp_client_instance
|
mcp_client/sync_wrapper.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Synchronous wrapper for MCP Client
|
| 3 |
+
Provides sync interface for Gradio event handlers
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
from typing import Optional, Dict, Any, List
|
| 8 |
+
from .client import MCPClient
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class SyncMCPClient:
|
| 12 |
+
"""Synchronous wrapper for MCPClient to use in Gradio event handlers"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, server_url: Optional[str] = None):
|
| 15 |
+
self.client = MCPClient(server_url)
|
| 16 |
+
self._loop = None
|
| 17 |
+
|
| 18 |
+
def _get_or_create_event_loop(self):
|
| 19 |
+
"""Get or create an event loop for async operations"""
|
| 20 |
+
try:
|
| 21 |
+
loop = asyncio.get_event_loop()
|
| 22 |
+
if loop.is_closed():
|
| 23 |
+
loop = asyncio.new_event_loop()
|
| 24 |
+
asyncio.set_event_loop(loop)
|
| 25 |
+
except RuntimeError:
|
| 26 |
+
loop = asyncio.new_event_loop()
|
| 27 |
+
asyncio.set_event_loop(loop)
|
| 28 |
+
return loop
|
| 29 |
+
|
| 30 |
+
def _run_async(self, coro):
|
| 31 |
+
"""Run an async coroutine and return the result"""
|
| 32 |
+
loop = self._get_or_create_event_loop()
|
| 33 |
+
return loop.run_until_complete(coro)
|
| 34 |
+
|
| 35 |
+
def initialize(self):
|
| 36 |
+
"""Initialize connection to MCP server (sync)"""
|
| 37 |
+
return self._run_async(self.client.initialize())
|
| 38 |
+
|
| 39 |
+
def analyze_leaderboard(
|
| 40 |
+
self,
|
| 41 |
+
leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
|
| 42 |
+
metric_focus: str = "overall",
|
| 43 |
+
time_range: str = "last_week",
|
| 44 |
+
top_n: int = 5,
|
| 45 |
+
hf_token: Optional[str] = None,
|
| 46 |
+
gemini_api_key: Optional[str] = None
|
| 47 |
+
) -> str:
|
| 48 |
+
"""Analyze leaderboard (sync wrapper)"""
|
| 49 |
+
return self._run_async(
|
| 50 |
+
self.client.analyze_leaderboard(
|
| 51 |
+
leaderboard_repo, metric_focus, time_range, top_n, hf_token, gemini_api_key
|
| 52 |
+
)
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
def debug_trace(
|
| 56 |
+
self,
|
| 57 |
+
trace_data: Dict[str, Any],
|
| 58 |
+
question: str,
|
| 59 |
+
metrics_data: Optional[Dict[str, Any]] = None,
|
| 60 |
+
hf_token: Optional[str] = None,
|
| 61 |
+
gemini_api_key: Optional[str] = None
|
| 62 |
+
) -> str:
|
| 63 |
+
"""Debug trace (sync wrapper)"""
|
| 64 |
+
return self._run_async(
|
| 65 |
+
self.client.debug_trace(trace_data, question, metrics_data, hf_token, gemini_api_key)
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
def estimate_cost(
|
| 69 |
+
self,
|
| 70 |
+
model: str,
|
| 71 |
+
agent_type: str = "both",
|
| 72 |
+
num_tests: int = 100,
|
| 73 |
+
hardware: Optional[str] = None,
|
| 74 |
+
hf_token: Optional[str] = None,
|
| 75 |
+
gemini_api_key: Optional[str] = None
|
| 76 |
+
) -> str:
|
| 77 |
+
"""Estimate cost (sync wrapper)"""
|
| 78 |
+
return self._run_async(
|
| 79 |
+
self.client.estimate_cost(model, agent_type, num_tests, hardware, hf_token, gemini_api_key)
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
def compare_runs(
|
| 83 |
+
self,
|
| 84 |
+
run_data_list: List[Dict[str, Any]],
|
| 85 |
+
focus_metrics: Optional[List[str]] = None,
|
| 86 |
+
hf_token: Optional[str] = None,
|
| 87 |
+
gemini_api_key: Optional[str] = None
|
| 88 |
+
) -> str:
|
| 89 |
+
"""Compare runs (sync wrapper)"""
|
| 90 |
+
return self._run_async(
|
| 91 |
+
self.client.compare_runs(run_data_list, focus_metrics, hf_token, gemini_api_key)
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
def analyze_results(
|
| 95 |
+
self,
|
| 96 |
+
results_data: List[Dict[str, Any]],
|
| 97 |
+
analysis_focus: str = "optimization",
|
| 98 |
+
hf_token: Optional[str] = None,
|
| 99 |
+
gemini_api_key: Optional[str] = None
|
| 100 |
+
) -> str:
|
| 101 |
+
"""Analyze results (sync wrapper)"""
|
| 102 |
+
return self._run_async(
|
| 103 |
+
self.client.analyze_results(results_data, analysis_focus, hf_token, gemini_api_key)
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
def get_dataset_info(
|
| 107 |
+
self,
|
| 108 |
+
dataset_repo: str,
|
| 109 |
+
hf_token: Optional[str] = None,
|
| 110 |
+
gemini_api_key: Optional[str] = None
|
| 111 |
+
) -> str:
|
| 112 |
+
"""Get dataset info (sync wrapper)"""
|
| 113 |
+
return self._run_async(
|
| 114 |
+
self.client.get_dataset_info(dataset_repo, hf_token, gemini_api_key)
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
def close(self):
|
| 118 |
+
"""Close the MCP client (sync wrapper)"""
|
| 119 |
+
return self._run_async(self.client.close())
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
# Global instance
|
| 123 |
+
_sync_mcp_client: Optional[SyncMCPClient] = None
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def get_sync_mcp_client() -> SyncMCPClient:
|
| 127 |
+
"""Get or create the global synchronous MCP client"""
|
| 128 |
+
global _sync_mcp_client
|
| 129 |
+
if _sync_mcp_client is None:
|
| 130 |
+
_sync_mcp_client = SyncMCPClient()
|
| 131 |
+
return _sync_mcp_client
|
requirements.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Gradio for UI
|
| 2 |
+
gradio>=6.0.0
|
| 3 |
+
|
| 4 |
+
# MCP Client for connecting to TraceMind-mcp-server
|
| 5 |
+
mcp>=1.21.0
|
| 6 |
+
|
| 7 |
+
# HuggingFace for dataset loading
|
| 8 |
+
datasets>=2.14.0
|
| 9 |
+
huggingface-hub>=0.20.0
|
| 10 |
+
|
| 11 |
+
# Data processing
|
| 12 |
+
pandas>=2.0.0
|
| 13 |
+
numpy>=1.24.0
|
| 14 |
+
|
| 15 |
+
# Utilities
|
| 16 |
+
python-dotenv>=1.0.0
|
| 17 |
+
aiohttp>=3.9.0
|
| 18 |
+
|
| 19 |
+
# Optional: For enhanced visualizations
|
| 20 |
+
plotly>=5.18.0
|
styles/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Styles package for TraceMind
|
| 3 |
+
Contains CSS themes and styling utilities
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from .tracemind_theme import get_tracemind_css
|
| 7 |
+
|
| 8 |
+
__all__ = ['get_tracemind_css']
|
styles/tracemind_theme.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
TraceMind CSS Theme
|
| 3 |
+
Central CSS variables and global styling for consistent theming
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
def get_tracemind_css():
|
| 7 |
+
"""
|
| 8 |
+
Return the complete CSS for TraceMind with CSS variables
|
| 9 |
+
|
| 10 |
+
Features:
|
| 11 |
+
- Dark theme optimized
|
| 12 |
+
- CSS variables for easy theming
|
| 13 |
+
- Responsive design support
|
| 14 |
+
- Smooth transitions
|
| 15 |
+
"""
|
| 16 |
+
return """
|
| 17 |
+
<style>
|
| 18 |
+
/* Import fonts */
|
| 19 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
|
| 20 |
+
|
| 21 |
+
/* TraceMind CSS Variables */
|
| 22 |
+
:root {
|
| 23 |
+
/* Primary Brand Colors */
|
| 24 |
+
--tm-primary: #4F46E5; /* Indigo 600 - Main brand */
|
| 25 |
+
--tm-secondary: #06B6D4; /* Cyan 500 - Accents */
|
| 26 |
+
|
| 27 |
+
/* Semantic Colors */
|
| 28 |
+
--tm-success: #10B981; /* Green 500 - High scores, success */
|
| 29 |
+
--tm-warning: #F59E0B; /* Amber 500 - Medium scores, warnings */
|
| 30 |
+
--tm-danger: #EF4444; /* Red 500 - Low scores, errors */
|
| 31 |
+
--tm-info: #3B82F6; /* Blue 500 - Info, API badge */
|
| 32 |
+
|
| 33 |
+
/* Background Colors (Dark Theme) */
|
| 34 |
+
--tm-bg-dark: #0F172A; /* Slate 900 - App background */
|
| 35 |
+
--tm-bg-card: #1E293B; /* Slate 800 - Card background */
|
| 36 |
+
--tm-bg-secondary: #334155; /* Slate 700 - Secondary elements */
|
| 37 |
+
--tm-bg-hover: rgba(79, 70, 229, 0.15); /* Hover overlay */
|
| 38 |
+
--tm-bg-stripe: rgba(30, 41, 59, 0.5); /* Table row stripe */
|
| 39 |
+
|
| 40 |
+
/* Text Colors */
|
| 41 |
+
--tm-text-primary: #F1F5F9; /* Slate 100 - Primary text */
|
| 42 |
+
--tm-text-secondary: #94A3B8; /* Slate 400 - Secondary text */
|
| 43 |
+
--tm-text-muted: #64748B; /* Slate 500 - Muted text */
|
| 44 |
+
|
| 45 |
+
/* Border Colors */
|
| 46 |
+
--tm-border-subtle: rgba(148, 163, 184, 0.1);
|
| 47 |
+
--tm-border-default: rgba(148, 163, 184, 0.2);
|
| 48 |
+
--tm-border-strong: rgba(148, 163, 184, 0.4);
|
| 49 |
+
|
| 50 |
+
/* Badge Colors */
|
| 51 |
+
--tm-badge-tool: #8B5CF6; /* Purple 500 - Tool agent */
|
| 52 |
+
--tm-badge-code: #F59E0B; /* Amber 500 - Code agent */
|
| 53 |
+
--tm-badge-both: #06B6D4; /* Cyan 500 - Both agent */
|
| 54 |
+
--tm-badge-api: #3B82F6; /* Blue 500 - API provider */
|
| 55 |
+
--tm-badge-gpu: #10B981; /* Green 500 - GPU provider */
|
| 56 |
+
|
| 57 |
+
/* Gradient Definitions */
|
| 58 |
+
--tm-gradient-success: linear-gradient(90deg, #10B981, #06B6D4);
|
| 59 |
+
--tm-gradient-warning: linear-gradient(90deg, #F59E0B, #FBBF24);
|
| 60 |
+
--tm-gradient-danger: linear-gradient(90deg, #EF4444, #F59E0B);
|
| 61 |
+
--tm-gradient-gold: linear-gradient(145deg, #ffd700, #ffc400);
|
| 62 |
+
--tm-gradient-silver: linear-gradient(145deg, #9ca3af, #787C7E);
|
| 63 |
+
--tm-gradient-bronze: linear-gradient(145deg, #CD7F32, #b36a1d);
|
| 64 |
+
|
| 65 |
+
/* Shadows */
|
| 66 |
+
--tm-shadow-sm: 0 1px 2px rgba(0, 0, 0, 0.05);
|
| 67 |
+
--tm-shadow-md: 0 4px 6px rgba(0, 0, 0, 0.1);
|
| 68 |
+
--tm-shadow-lg: 0 10px 15px rgba(0, 0, 0, 0.1);
|
| 69 |
+
--tm-shadow-glow: 0 0 20px rgba(79, 70, 229, 0.3);
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/* Global Styles */
|
| 73 |
+
.gradio-container {
|
| 74 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important;
|
| 75 |
+
background: var(--tm-bg-dark) !important;
|
| 76 |
+
color: var(--tm-text-primary) !important;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/* Headers */
|
| 80 |
+
h1, h2, h3, h4, h5, h6 {
|
| 81 |
+
color: var(--tm-text-primary) !important;
|
| 82 |
+
font-weight: 600 !important;
|
| 83 |
+
font-family: 'Inter', sans-serif !important;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
/* Links */
|
| 87 |
+
a {
|
| 88 |
+
color: var(--tm-secondary) !important;
|
| 89 |
+
text-decoration: none !important;
|
| 90 |
+
transition: color 0.2s ease;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
a:hover {
|
| 94 |
+
color: var(--tm-primary) !important;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
/* Buttons */
|
| 98 |
+
button {
|
| 99 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
button:hover {
|
| 103 |
+
transform: translateY(-2px) !important;
|
| 104 |
+
box-shadow: var(--tm-shadow-lg) !important;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
/* Smooth transitions for all interactive elements */
|
| 108 |
+
* {
|
| 109 |
+
transition: background-color 0.2s ease, color 0.2s ease, border-color 0.2s ease;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
/* Custom scrollbar */
|
| 113 |
+
::-webkit-scrollbar {
|
| 114 |
+
width: 8px;
|
| 115 |
+
height: 8px;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
::-webkit-scrollbar-track {
|
| 119 |
+
background: var(--tm-bg-secondary);
|
| 120 |
+
border-radius: 4px;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
::-webkit-scrollbar-thumb {
|
| 124 |
+
background: var(--tm-secondary);
|
| 125 |
+
border-radius: 4px;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
::-webkit-scrollbar-thumb:hover {
|
| 129 |
+
background: var(--tm-primary);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
/* Responsive breakpoints */
|
| 133 |
+
@media (max-width: 768px) {
|
| 134 |
+
.gradio-container {
|
| 135 |
+
padding: 8px !important;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
h1 {
|
| 139 |
+
font-size: 1.5rem !important;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
h2 {
|
| 143 |
+
font-size: 1.25rem !important;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
h3 {
|
| 147 |
+
font-size: 1.1rem !important;
|
| 148 |
+
}
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
@media (max-width: 480px) {
|
| 152 |
+
.gradio-container {
|
| 153 |
+
padding: 4px !important;
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
/* Modal Dialog Styles */
|
| 158 |
+
.modal-dialog {
|
| 159 |
+
position: fixed !important;
|
| 160 |
+
top: 50% !important;
|
| 161 |
+
left: 50% !important;
|
| 162 |
+
transform: translate(-50%, -50%) !important;
|
| 163 |
+
z-index: 9999 !important;
|
| 164 |
+
background: var(--tm-bg-card) !important;
|
| 165 |
+
border: 2px solid var(--tm-border-strong) !important;
|
| 166 |
+
border-radius: 12px !important;
|
| 167 |
+
padding: 24px !important;
|
| 168 |
+
box-shadow: 0 25px 50px rgba(0, 0, 0, 0.5) !important;
|
| 169 |
+
max-width: 800px !important;
|
| 170 |
+
width: 90% !important;
|
| 171 |
+
max-height: 90vh !important;
|
| 172 |
+
overflow-y: auto !important;
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
/* Modal backdrop */
|
| 176 |
+
.modal-dialog::before {
|
| 177 |
+
content: '' !important;
|
| 178 |
+
position: fixed !important;
|
| 179 |
+
top: 0 !important;
|
| 180 |
+
left: 0 !important;
|
| 181 |
+
right: 0 !important;
|
| 182 |
+
bottom: 0 !important;
|
| 183 |
+
background: rgba(0, 0, 0, 0.7) !important;
|
| 184 |
+
z-index: -1 !important;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
/* Specific dialog IDs for additional customization */
|
| 188 |
+
#new-eval-dialog,
|
| 189 |
+
#export-dialog {
|
| 190 |
+
animation: modalFadeIn 0.3s ease-out !important;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
@keyframes modalFadeIn {
|
| 194 |
+
from {
|
| 195 |
+
opacity: 0;
|
| 196 |
+
transform: translate(-50%, -55%);
|
| 197 |
+
}
|
| 198 |
+
to {
|
| 199 |
+
opacity: 1;
|
| 200 |
+
transform: translate(-50%, -50%);
|
| 201 |
+
}
|
| 202 |
+
}
|
| 203 |
+
</style>
|
| 204 |
+
"""
|
utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Utils package for MockTraceMind
|
utils/auth.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HuggingFace Authentication for MockTraceMind
|
| 3 |
+
Using Gradio's built-in OAuth support (simpler than manual OAuth)
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import gradio as gr
|
| 8 |
+
from typing import Optional
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
|
| 11 |
+
# Development mode flag - set DISABLE_OAUTH=true to skip OAuth for local dev
|
| 12 |
+
DEV_MODE = os.getenv("DISABLE_OAUTH", "false").lower() in ("true", "1", "yes")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class User:
|
| 17 |
+
"""Authenticated user information"""
|
| 18 |
+
username: str
|
| 19 |
+
name: str
|
| 20 |
+
avatar_url: str
|
| 21 |
+
token: str
|
| 22 |
+
|
| 23 |
+
@classmethod
|
| 24 |
+
def from_oauth(cls, token: gr.OAuthToken, profile: gr.OAuthProfile) -> "User":
|
| 25 |
+
"""Create User from Gradio OAuth objects"""
|
| 26 |
+
return cls(
|
| 27 |
+
username=profile.username,
|
| 28 |
+
name=profile.name,
|
| 29 |
+
avatar_url=profile.picture,
|
| 30 |
+
token=str(token)
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
@classmethod
|
| 34 |
+
def create_dev_user(cls) -> "User":
|
| 35 |
+
"""Create a mock user for development mode"""
|
| 36 |
+
return cls(
|
| 37 |
+
username=os.getenv("DEV_USERNAME", "dev_user"),
|
| 38 |
+
name=os.getenv("DEV_NAME", "Development User"),
|
| 39 |
+
avatar_url="https://huggingface.co/avatars/default-avatar.png",
|
| 40 |
+
token="dev_token_12345"
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def is_authenticated(token: gr.OAuthToken | None, profile: gr.OAuthProfile | None) -> bool:
|
| 45 |
+
"""
|
| 46 |
+
Check if user is authenticated
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
token: OAuth token from Gradio
|
| 50 |
+
profile: OAuth profile from Gradio
|
| 51 |
+
|
| 52 |
+
Returns:
|
| 53 |
+
True if both token and profile are valid, or if in dev mode
|
| 54 |
+
"""
|
| 55 |
+
# In dev mode, always consider authenticated
|
| 56 |
+
if DEV_MODE:
|
| 57 |
+
return True
|
| 58 |
+
|
| 59 |
+
return token is not None and profile is not None
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def get_user_info(token: gr.OAuthToken | None, profile: gr.OAuthProfile | None) -> Optional[User]:
|
| 63 |
+
"""
|
| 64 |
+
Get user information from OAuth objects
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
token: OAuth token from Gradio
|
| 68 |
+
profile: OAuth profile from Gradio
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
User object if authenticated, None otherwise
|
| 72 |
+
"""
|
| 73 |
+
if not is_authenticated(token, profile):
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
# In dev mode, return mock user
|
| 77 |
+
if DEV_MODE:
|
| 78 |
+
return User.create_dev_user()
|
| 79 |
+
|
| 80 |
+
return User.from_oauth(token, profile)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def create_login_handler(on_login_success=None, on_login_failure=None):
|
| 84 |
+
"""
|
| 85 |
+
Create a login handler function for Gradio LoginButton
|
| 86 |
+
|
| 87 |
+
Args:
|
| 88 |
+
on_login_success: Callback function called when login succeeds
|
| 89 |
+
on_login_failure: Callback function called when login fails
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
Handler function compatible with Gradio LoginButton.click()
|
| 93 |
+
"""
|
| 94 |
+
def handle_login(token: gr.OAuthToken | None, profile: gr.OAuthProfile | None):
|
| 95 |
+
if is_authenticated(token, profile):
|
| 96 |
+
user = get_user_info(token, profile)
|
| 97 |
+
if on_login_success:
|
| 98 |
+
return on_login_success(user)
|
| 99 |
+
return user
|
| 100 |
+
else:
|
| 101 |
+
if on_login_failure:
|
| 102 |
+
return on_login_failure()
|
| 103 |
+
return None
|
| 104 |
+
|
| 105 |
+
return handle_login
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def require_auth(func):
|
| 109 |
+
"""
|
| 110 |
+
Decorator to require authentication for a function
|
| 111 |
+
|
| 112 |
+
Usage:
|
| 113 |
+
@require_auth
|
| 114 |
+
def my_function(user: User, other_args...):
|
| 115 |
+
# user is guaranteed to be valid User object
|
| 116 |
+
pass
|
| 117 |
+
"""
|
| 118 |
+
def wrapper(token: gr.OAuthToken | None, profile: gr.OAuthProfile | None, *args, **kwargs):
|
| 119 |
+
if not is_authenticated(token, profile):
|
| 120 |
+
gr.Warning("Please log in to Hugging Face to access this feature!")
|
| 121 |
+
return None
|
| 122 |
+
|
| 123 |
+
user = get_user_info(token, profile)
|
| 124 |
+
return func(user, *args, **kwargs)
|
| 125 |
+
|
| 126 |
+
return wrapper
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
# UI component helpers
|
| 130 |
+
def create_login_button(visible: bool = True) -> gr.LoginButton:
|
| 131 |
+
"""
|
| 132 |
+
Create a styled HuggingFace login button
|
| 133 |
+
Automatically hidden in dev mode
|
| 134 |
+
"""
|
| 135 |
+
# Hide login button in dev mode
|
| 136 |
+
if DEV_MODE:
|
| 137 |
+
visible = False
|
| 138 |
+
|
| 139 |
+
return gr.LoginButton(visible=visible)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def create_user_info_display(user: Optional[User]) -> str:
|
| 143 |
+
"""
|
| 144 |
+
Create HTML for user info display
|
| 145 |
+
|
| 146 |
+
Args:
|
| 147 |
+
user: User object or None
|
| 148 |
+
|
| 149 |
+
Returns:
|
| 150 |
+
HTML string for display
|
| 151 |
+
"""
|
| 152 |
+
if user is None:
|
| 153 |
+
# In dev mode, don't show login prompt
|
| 154 |
+
if DEV_MODE:
|
| 155 |
+
return """
|
| 156 |
+
<div style="text-align: center; padding: 10px; border: 2px solid #ffa500; border-radius: 10px; background-color: #fff4e6;">
|
| 157 |
+
<strong>🛠️ Development Mode</strong>
|
| 158 |
+
<p style="margin: 5px 0 0 0; font-size: 0.9em;">OAuth disabled for local testing</p>
|
| 159 |
+
</div>
|
| 160 |
+
"""
|
| 161 |
+
|
| 162 |
+
return """
|
| 163 |
+
<div style="text-align: center; padding: 20px; border: 2px dashed #ccc; border-radius: 10px;">
|
| 164 |
+
<h3>🔒 Login Required</h3>
|
| 165 |
+
<p>Please log in with your Hugging Face account to access TraceMind</p>
|
| 166 |
+
</div>
|
| 167 |
+
"""
|
| 168 |
+
|
| 169 |
+
# Add dev mode badge if in dev mode
|
| 170 |
+
dev_badge = ""
|
| 171 |
+
if DEV_MODE:
|
| 172 |
+
dev_badge = '<span style="background: #ffa500; color: white; padding: 2px 8px; border-radius: 4px; font-size: 0.8em; margin-left: 10px;">DEV</span>'
|
| 173 |
+
|
| 174 |
+
return f"""
|
| 175 |
+
<div style="display: flex; align-items: center; padding: 10px; border: 1px solid #e0e0e0; border-radius: 8px;">
|
| 176 |
+
<img src="{user.avatar_url}" alt="{user.name}"
|
| 177 |
+
style="width: 48px; height: 48px; border-radius: 50%; margin-right: 15px;">
|
| 178 |
+
<div>
|
| 179 |
+
<strong>{user.name}</strong>{dev_badge}<br>
|
| 180 |
+
<small style="color: #666;">@{user.username}</small>
|
| 181 |
+
</div>
|
| 182 |
+
</div>
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def create_auth_warning(message: str = "Please login first") -> str:
|
| 187 |
+
"""Create a warning message for unauthenticated users"""
|
| 188 |
+
return f"""
|
| 189 |
+
<div style="text-align: center; padding: 20px; border: 2px solid #ff6b6b; border-radius: 10px; background-color: #ffe0e0;">
|
| 190 |
+
<h3>⚠️ Authentication Required</h3>
|
| 191 |
+
<p>{message}</p>
|
| 192 |
+
</div>
|
| 193 |
+
"""
|
utils/navigation.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Navigation utilities for MockTraceMind screen flow
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from enum import Enum
|
| 7 |
+
from typing import Dict, Any, Tuple
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class Screen(Enum):
|
| 11 |
+
"""Available screens in MockTraceMind"""
|
| 12 |
+
LEADERBOARD = "leaderboard"
|
| 13 |
+
COMPARE = "compare"
|
| 14 |
+
RUN_DETAIL = "run_detail"
|
| 15 |
+
TRACE_DETAIL = "trace_detail"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class Navigator:
|
| 19 |
+
"""
|
| 20 |
+
Manages screen navigation and state
|
| 21 |
+
|
| 22 |
+
Screen Flow:
|
| 23 |
+
- Leaderboard (Screen 1)
|
| 24 |
+
- Click row → Run Detail (Screen 3)
|
| 25 |
+
- Select 2+ rows + Compare → Compare View (Screen 2)
|
| 26 |
+
- Click either run → Run Detail (Screen 3)
|
| 27 |
+
- Run Detail (Screen 3)
|
| 28 |
+
- Click test case row → Trace Detail (Screen 4)
|
| 29 |
+
- Trace Detail (Screen 4)
|
| 30 |
+
- Back → Run Detail (Screen 3)
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self):
|
| 34 |
+
self.current_screen = Screen.LEADERBOARD
|
| 35 |
+
self.navigation_stack = [Screen.LEADERBOARD]
|
| 36 |
+
self.screen_context: Dict[str, Any] = {}
|
| 37 |
+
|
| 38 |
+
def navigate_to(
|
| 39 |
+
self,
|
| 40 |
+
screen: Screen,
|
| 41 |
+
context: Dict[str, Any] = None,
|
| 42 |
+
add_to_stack: bool = True
|
| 43 |
+
) -> Tuple[Screen, Dict[str, Any]]:
|
| 44 |
+
"""
|
| 45 |
+
Navigate to a screen with optional context
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
screen: Target screen
|
| 49 |
+
context: Data to pass to the screen
|
| 50 |
+
add_to_stack: Whether to add to navigation stack
|
| 51 |
+
|
| 52 |
+
Returns:
|
| 53 |
+
Tuple of (screen, context)
|
| 54 |
+
"""
|
| 55 |
+
self.current_screen = screen
|
| 56 |
+
|
| 57 |
+
if context:
|
| 58 |
+
self.screen_context.update(context)
|
| 59 |
+
|
| 60 |
+
if add_to_stack:
|
| 61 |
+
self.navigation_stack.append(screen)
|
| 62 |
+
|
| 63 |
+
return screen, self.screen_context
|
| 64 |
+
|
| 65 |
+
def back(self) -> Tuple[Screen, Dict[str, Any]]:
|
| 66 |
+
"""
|
| 67 |
+
Navigate back in the navigation stack
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
Tuple of (previous_screen, context)
|
| 71 |
+
"""
|
| 72 |
+
if len(self.navigation_stack) > 1:
|
| 73 |
+
self.navigation_stack.pop() # Remove current
|
| 74 |
+
previous = self.navigation_stack[-1]
|
| 75 |
+
self.current_screen = previous
|
| 76 |
+
return previous, self.screen_context
|
| 77 |
+
|
| 78 |
+
# Already at root
|
| 79 |
+
return self.current_screen, self.screen_context
|
| 80 |
+
|
| 81 |
+
def get_current_screen(self) -> Screen:
|
| 82 |
+
"""Get current active screen"""
|
| 83 |
+
return self.current_screen
|
| 84 |
+
|
| 85 |
+
def get_context(self, key: str, default: Any = None) -> Any:
|
| 86 |
+
"""Get value from screen context"""
|
| 87 |
+
return self.screen_context.get(key, default)
|
| 88 |
+
|
| 89 |
+
def set_context(self, key: str, value: Any) -> None:
|
| 90 |
+
"""Set value in screen context"""
|
| 91 |
+
self.screen_context[key] = value
|
| 92 |
+
|
| 93 |
+
def clear_context(self) -> None:
|
| 94 |
+
"""Clear all screen context"""
|
| 95 |
+
self.screen_context.clear()
|
| 96 |
+
|
| 97 |
+
def reset(self) -> None:
|
| 98 |
+
"""Reset navigation to initial state"""
|
| 99 |
+
self.current_screen = Screen.LEADERBOARD
|
| 100 |
+
self.navigation_stack = [Screen.LEADERBOARD]
|
| 101 |
+
self.screen_context.clear()
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
# Gradio visibility update helpers
|
| 105 |
+
def show_screen(screen: Screen) -> Dict[gr.Component, gr.update]:
|
| 106 |
+
"""
|
| 107 |
+
Generate Gradio updates to show specific screen
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
Dictionary of component updates for gr.update
|
| 111 |
+
"""
|
| 112 |
+
return {
|
| 113 |
+
"leaderboard_container": gr.update(visible=(screen == Screen.LEADERBOARD)),
|
| 114 |
+
"compare_container": gr.update(visible=(screen == Screen.COMPARE)),
|
| 115 |
+
"run_detail_container": gr.update(visible=(screen == Screen.RUN_DETAIL)),
|
| 116 |
+
"trace_detail_container": gr.update(visible=(screen == Screen.TRACE_DETAIL)),
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def create_back_button(visible: bool = True) -> gr.Button:
|
| 121 |
+
"""Create a consistent back button"""
|
| 122 |
+
return gr.Button("⬅️ Back", visible=visible, variant="secondary", size="sm")
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def create_breadcrumb(navigation_stack: list) -> str:
|
| 126 |
+
"""
|
| 127 |
+
Create breadcrumb navigation HTML
|
| 128 |
+
|
| 129 |
+
Args:
|
| 130 |
+
navigation_stack: List of Screen enums
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
HTML string for breadcrumb
|
| 134 |
+
"""
|
| 135 |
+
breadcrumb_names = {
|
| 136 |
+
Screen.LEADERBOARD: "Leaderboard",
|
| 137 |
+
Screen.COMPARE: "Compare",
|
| 138 |
+
Screen.RUN_DETAIL: "Run Detail",
|
| 139 |
+
Screen.TRACE_DETAIL: "Trace Detail"
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
breadcrumb_items = []
|
| 143 |
+
for i, screen in enumerate(navigation_stack):
|
| 144 |
+
name = breadcrumb_names.get(screen, screen.value)
|
| 145 |
+
if i < len(navigation_stack) - 1:
|
| 146 |
+
# Not the last item - make it a link
|
| 147 |
+
breadcrumb_items.append(f'<span style="color: #666;">{name}</span>')
|
| 148 |
+
else:
|
| 149 |
+
# Last item - current screen
|
| 150 |
+
breadcrumb_items.append(f'<strong>{name}</strong>')
|
| 151 |
+
|
| 152 |
+
breadcrumb_html = " > ".join(breadcrumb_items)
|
| 153 |
+
|
| 154 |
+
return f"""
|
| 155 |
+
<div style="padding: 10px; background-color: #f5f5f5; border-radius: 5px; margin-bottom: 10px;">
|
| 156 |
+
{breadcrumb_html}
|
| 157 |
+
</div>
|
| 158 |
+
"""
|