DataEngEval / config /use_cases.yaml
uparekh01151's picture
Initial commit for DataEngEval
acd8e16
# Use Cases Configuration
use_cases:
sql_generation:
name: "SQL Generation"
description: "Natural language to SQL query generation"
input_type: "natural_language"
output_type: "sql_query"
metrics:
- correctness_exact
- exec_success
- result_match_f1
- dialect_ok
- readability
- latency
weights:
correctness_exact: 0.40
exec_success: 0.25
result_match_f1: 0.15
dialect_ok: 0.10
readability: 0.05
latency: 0.05
datasets:
- nyc_taxi_small
dialects:
- presto
- bigquery
- snowflake
code_generation:
name: "Code Generation"
description: "Natural language to source code generation"
input_type: "natural_language"
output_type: "source_code"
metrics:
- syntax_correctness
- compilation_success
- execution_success
- code_quality
- performance
- latency
weights:
syntax_correctness: 0.30
compilation_success: 0.25
execution_success: 0.20
code_quality: 0.15
performance: 0.05
latency: 0.05
languages:
- python
- go
- javascript
- java
datasets:
- python_algorithms
- go_algorithms
documentation:
name: "Documentation Generation"
description: "Natural language to technical documentation"
input_type: "natural_language"
output_type: "documentation"
metrics:
- accuracy
- completeness
- clarity
- format_compliance
- technical_correctness
- latency
weights:
accuracy: 0.25
completeness: 0.25
clarity: 0.20
format_compliance: 0.15
technical_correctness: 0.10
latency: 0.05
formats:
- markdown
- html
- json
- yaml
datasets:
- technical_docs
- api_documentation
# Evaluation frameworks for each use case
evaluation_frameworks:
sql_generation:
executor: "SQLExecutor"
metrics_computer: "SQLMetricsComputer"
validator: "SQLValidator"
code_generation:
executor: "CodeExecutor"
metrics_computer: "CodeMetricsComputer"
validator: "CodeValidator"
documentation:
executor: "DocProcessor"
metrics_computer: "DocMetricsComputer"
validator: "DocValidator"
# Model configurations for each use case
model_configs:
sql_generation:
models:
- "SQLCoder-7B"
- "SQLCoder2-7B"
- "CodeT5-Base"
- "GPT-4"
code_generation:
models:
- "CodeT5-Base"
- "CodeGen-6B"
- "GPT-4"
- "Claude-3"
documentation:
models:
- "GPT-4"
- "Claude-3"
- "Llama-2"
- "PaLM-2"