openenv2 / server /data.py
hissterical's picture
Upload 10 files
ebf4715 verified
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
@dataclass(frozen=True)
class TaskSpec:
task_id: str
name: str
description: str
difficulty: str
max_steps: int
broken: str
target: dict[str, Any]
required_paths: dict[str, float]
logic_checks: list[str]
TASK_REGISTRY: dict[str, TaskSpec] = {
"easy_docker": TaskSpec(
task_id="easy_docker",
name="Docker Compose Repair",
description=(
"Fix docker-compose config: invalid port entry, environment format, "
"image tags, and full DB port mapping"
),
difficulty="easy",
max_steps=15,
broken="""version: \"3.8\"
services:
web:
image: nginx
ports:
- \"80:80\"
- abcdef
environment:
- DEBUG=true
- API_KEY
db:
image: postgres:15
ports:
- \"5432\"
volumes:
db_data:
""",
target={
"version": "3.8",
"services": {
"web": {
"image": "nginx:latest",
"ports": ["80:80"],
"environment": {
"DEBUG": "true",
"API_KEY": "placeholder",
},
},
"db": {
"image": "postgres:15",
"ports": ["5432:5432"],
},
},
"volumes": {"db_data": None},
},
required_paths={
"services.web.image": 1.0,
"services.web.ports": 1.3,
"services.web.environment.DEBUG": 1.0,
"services.web.environment.API_KEY": 1.0,
"services.db.ports": 1.1,
"volumes.db_data": 0.6,
},
logic_checks=[
"web port must be host:container",
"db port must be full mapping",
"environment should be key-value map",
],
),
"medium_k8s": TaskSpec(
task_id="medium_k8s",
name="Kubernetes Deployment Repair",
description=(
"Fix deployment manifest types and required fields: replicas type, "
"namespace, memory units, cpu request format, and containerPort"
),
difficulty="medium",
max_steps=18,
broken="""apiVersion: apps/v1
kind: Deployment
metadata:
name: web-app
spec:
replicas: \"3\"
selector:
matchLabels:
app: web
template:
metadata:
labels:
app: web
spec:
containers:
- name: nginx
image: nginx
resources:
limits:
memory: 512
cpu: \"1\"
requests:
memory: 1Gi
cpu: 500m
""",
target={
"apiVersion": "apps/v1",
"kind": "Deployment",
"metadata": {"name": "web-app", "namespace": "default"},
"spec": {
"replicas": 3,
"selector": {"matchLabels": {"app": "web"}},
"template": {
"metadata": {"labels": {"app": "web"}},
"spec": {
"containers": [
{
"name": "nginx",
"image": "nginx:latest",
"resources": {
"limits": {"memory": "512Mi", "cpu": "1"},
"requests": {"memory": "256Mi", "cpu": "500m"},
},
"ports": [{"containerPort": 80}],
}
]
},
},
},
},
required_paths={
"metadata.namespace": 1.0,
"spec.replicas": 1.0,
"spec.template.spec.containers.0.image": 0.8,
"spec.template.spec.containers.0.resources.limits.memory": 1.1,
"spec.template.spec.containers.0.resources.requests.memory": 1.1,
"spec.template.spec.containers.0.resources.requests.cpu": 1.0,
"spec.template.spec.containers.0.ports.0.containerPort": 1.0,
},
logic_checks=[
"replicas should be integer",
"memory values should be strings with unit",
"cpu request should be millicores string",
],
),
"hard_ml_config": TaskSpec(
task_id="hard_ml_config",
name="ML Training Config Stabilization",
description=(
"Fix interdependent training and hardware constraints: warmup < max, "
"GPU consistency, optimizer choice, and logging frequency"
),
difficulty="hard",
max_steps=22,
broken="""training:
batch_size: 32
gradient_accumulation_steps: 4
max_steps: 100
warmup_steps: 200
learning_rate: 0.001
mixed_precision: fp16
fp16: true
optimizer:
type: adam
weight_decay: 0.01
hardware:
gpu_count: 0
use_cuda: true
data:
train_batch_size: 64
eval_batch_size: 32
logging:
log_interval: 1000
""",
target={
"training": {
"batch_size": 16,
"gradient_accumulation_steps": 2,
"max_steps": 1000,
"warmup_steps": 100,
"learning_rate": 0.001,
"mixed_precision": "fp16",
"optimizer": {"type": "adamw", "weight_decay": 0.01},
},
"hardware": {"gpu_count": 1, "use_cuda": True},
"data": {"train_batch_size": 32, "eval_batch_size": 32},
"logging": {"log_interval": 10},
},
required_paths={
"training.max_steps": 1.1,
"training.warmup_steps": 1.3,
"training.optimizer.type": 1.2,
"hardware.gpu_count": 1.2,
"hardware.use_cuda": 0.8,
"data.train_batch_size": 1.1,
"logging.log_interval": 1.0,
},
logic_checks=[
"warmup_steps must be less than max_steps",
"if use_cuda is true, gpu_count must be >= 1",
"train_batch_size should be 2 * batch_size",
"log_interval should be <= 100",
],
),
}