Spaces:
Sleeping
Sleeping
# Metrics Configuration | |
metrics: | |
# Scoring weights for composite score calculation | |
weights: | |
correctness_exact: 0.40 | |
exec_success: 0.25 | |
result_match_f1: 0.15 | |
dialect_ok: 0.10 | |
readability: 0.05 | |
latency: 0.05 | |
# Metric descriptions | |
descriptions: | |
correctness_exact: "Binary score (0/1) for exact result match" | |
exec_success: "Binary score (0/1) for successful SQL execution" | |
result_match_f1: "F1 score for partial result matching" | |
latency: "Response time in milliseconds" | |
readability: "Score based on SQL structure and formatting" | |
dialect_ok: "Binary score (0/1) for successful SQL transpilation" | |
# Thresholds and limits | |
thresholds: | |
max_latency_ms: 30000 # 30 seconds timeout | |
min_score: 0.0 | |
max_score: 1.0 | |
# Display formatting | |
formatting: | |
composite_score: "{:.4f}" | |
correctness_exact: "{:.2f}" | |
exec_success: "{:.2f}" | |
result_match_f1: "{:.4f}" | |
latency_ms: "{:.1f}ms" | |
dialect_ok: "{:.2f}" | |
readability: "{:.2f}" | |
# Mock SQL Generation Patterns | |
mock_sql: | |
patterns: | |
count_queries: | |
- "how many" | |
- "count" | |
average_queries: | |
- "average" | |
- "avg" | |
total_queries: | |
- "total" | |
- "amount" | |
passenger_queries: | |
- "passenger" | |
templates: | |
count_trips: "SELECT COUNT(*) as total_trips FROM trips" | |
count_generic: "SELECT COUNT(*) FROM trips" | |
avg_fare: "SELECT AVG(fare_amount) as avg_fare FROM trips" | |
avg_generic: "SELECT AVG(total_amount) FROM trips" | |
total_amount: "SELECT SUM(total_amount) as total_collected FROM trips" | |
passenger_count: "SELECT passenger_count, COUNT(*) as trip_count FROM trips GROUP BY passenger_count" | |
default: "SELECT * FROM trips LIMIT 10" | |