| |
| from __future__ import annotations |
|
|
| import random |
| import uuid |
| from dataclasses import dataclass |
| from typing import Dict, List |
|
|
| import numpy as np |
|
|
| from .bug_library import BugTemplate |
|
|
|
|
| @dataclass |
| class Scenario: |
| scenario_id: str |
| task_id: str |
| repo_files: Dict[str, str] |
| loss_curve: List[Dict] |
| gpu_profile: List[Dict] |
| training_log: str |
| diagnostic_report: str |
| ground_truth: Dict |
|
|
|
|
| class ScenarioGenerator: |
| def __init__(self, bug_templates: List[BugTemplate]): |
| """Create a generator that samples from a set of bug templates.""" |
| self.bug_templates = bug_templates |
|
|
| def generate(self, difficulty: str, seed: int | None = None) -> Scenario: |
| """Build a scenario with deterministic artifacts when a seed is provided.""" |
| rng = random.Random(seed) |
| candidates = [b for b in self.bug_templates if b.difficulty == difficulty] |
| if not candidates: |
| raise ValueError(f"Unknown difficulty: {difficulty}") |
| template = rng.choice(candidates) |
|
|
| repo_files = self._base_repo(rng) |
| repo_files = template.repo_mutator(repo_files, rng) |
|
|
| loss_curve = template.artifact_generator("loss_curve", rng) |
| gpu_profile = template.artifact_generator("gpu_profile", rng) |
| training_log = template.artifact_generator("training_log", rng) |
| diagnostic_report = template.artifact_generator("diagnostic_report", rng) |
|
|
| ground_truth = { |
| "bug_type": template.bug_type, |
| "category": template.category, |
| "primary_bug_file": template.primary_bug_file, |
| "related_files": template.related_files, |
| "red_herring_file": template.red_herring_file, |
| "fix_strategy": template.fix_strategy, |
| "line_range": template.line_range, |
| } |
|
|
| return Scenario( |
| scenario_id=str(uuid.uuid4())[:8], |
| task_id=difficulty, |
| repo_files=repo_files, |
| loss_curve=loss_curve, |
| gpu_profile=gpu_profile, |
| training_log=training_log, |
| diagnostic_report=diagnostic_report, |
| ground_truth=ground_truth, |
| ) |
|
|
| def _base_repo(self, rng: random.Random) -> Dict[str, str]: |
| return { |
| "train.py": self._train_py(), |
| "model/architecture.py": self._model_py(), |
| "model/attention.py": self._attention_py(), |
| "data/dataset.py": self._dataset_py(), |
| "data/preprocessing.py": self._preprocess_py(), |
| "config/training_config.yaml": self._config_yaml(), |
| } |
|
|
| def _train_py(self) -> str: |
| return """import torch\nfrom model.architecture import Net\n\n# training loop placeholder\n""" |
|
|
| def _model_py(self) -> str: |
| return """import torch.nn as nn\n\nclass Net(nn.Module):\n def __init__(self):\n super().__init__()\n""" |
|
|
| def _attention_py(self) -> str: |
| return """# custom attention layer\n""" |
|
|
| def _dataset_py(self) -> str: |
| return """from torch.utils.data import Dataset\n\nclass ImageDataset(Dataset):\n pass\n""" |
|
|
| def _preprocess_py(self) -> str: |
| return """def normalize(x):\n return x\n""" |
|
|
| def _config_yaml(self) -> str: |
| return "lr: 0.001\nbatch_size: 32\n" |
|
|