DataEngEval / config /metrics.yaml
uparekh01151's picture
Initial commit for DataEngEval
acd8e16
# Metrics Configuration
metrics:
# Scoring weights for composite score calculation
weights:
correctness_exact: 0.40
exec_success: 0.25
result_match_f1: 0.15
dialect_ok: 0.10
readability: 0.05
latency: 0.05
# Metric descriptions
descriptions:
correctness_exact: "Binary score (0/1) for exact result match"
exec_success: "Binary score (0/1) for successful SQL execution"
result_match_f1: "F1 score for partial result matching"
latency: "Response time in milliseconds"
readability: "Score based on SQL structure and formatting"
dialect_ok: "Binary score (0/1) for successful SQL transpilation"
# Thresholds and limits
thresholds:
max_latency_ms: 30000 # 30 seconds timeout
min_score: 0.0
max_score: 1.0
# Display formatting
formatting:
composite_score: "{:.4f}"
correctness_exact: "{:.2f}"
exec_success: "{:.2f}"
result_match_f1: "{:.4f}"
latency_ms: "{:.1f}ms"
dialect_ok: "{:.2f}"
readability: "{:.2f}"
# Mock SQL Generation Patterns
mock_sql:
patterns:
count_queries:
- "how many"
- "count"
average_queries:
- "average"
- "avg"
total_queries:
- "total"
- "amount"
passenger_queries:
- "passenger"
templates:
count_trips: "SELECT COUNT(*) as total_trips FROM trips"
count_generic: "SELECT COUNT(*) FROM trips"
avg_fare: "SELECT AVG(fare_amount) as avg_fare FROM trips"
avg_generic: "SELECT AVG(total_amount) FROM trips"
total_amount: "SELECT SUM(total_amount) as total_collected FROM trips"
passenger_count: "SELECT passenger_count, COUNT(*) as trip_count FROM trips GROUP BY passenger_count"
default: "SELECT * FROM trips LIMIT 10"