Spaces:
Sleeping
Sleeping
Create pack_builder.py
Browse files- pipeline/pack_builder.py +70 -0
pipeline/pack_builder.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
from .io_utils import save_json
|
| 5 |
+
# Replace with your real LLM runner
|
| 6 |
+
from .run_two_phase import call_model
|
| 7 |
+
|
| 8 |
+
def slugify(name: str) -> str:
|
| 9 |
+
s = re.sub(r"[^a-zA-Z0-9]+", "_", name.strip()).strip("_").lower()
|
| 10 |
+
return s or "scenario"
|
| 11 |
+
|
| 12 |
+
def build_pack_from_scenario(root_dir: Path, scenario_text: str, pack_name_hint: str = "") -> Path:
|
| 13 |
+
"""
|
| 14 |
+
1) Calls extractor prompts to create inputs.json from free text.
|
| 15 |
+
2) Writes default constraints.json and schema.json.
|
| 16 |
+
3) Returns the created pack path.
|
| 17 |
+
"""
|
| 18 |
+
prompts_dir = root_dir / "prompts"
|
| 19 |
+
extractor_sys = (prompts_dir / "extractor_system.txt").read_text(encoding="utf-8")
|
| 20 |
+
extractor_user_tmpl = (prompts_dir / "extractor_user_template.txt").read_text(encoding="utf-8")
|
| 21 |
+
user_prompt = extractor_user_tmpl.replace("{SCENARIO_TEXT}", scenario_text)
|
| 22 |
+
|
| 23 |
+
raw = call_model(extractor_sys, user_prompt)
|
| 24 |
+
try:
|
| 25 |
+
extracted = json.loads(raw)
|
| 26 |
+
except Exception as e:
|
| 27 |
+
raise ValueError(f"Extractor produced non-JSON output.\nRaw:\n{raw}") from e
|
| 28 |
+
|
| 29 |
+
# Decide pack name
|
| 30 |
+
hint = pack_name_hint or extracted.get("context") or "scenario"
|
| 31 |
+
pack_slug = slugify(hint)[:48]
|
| 32 |
+
pack_dir = root_dir / "packs" / pack_slug
|
| 33 |
+
pack_dir.mkdir(parents=True, exist_ok=True)
|
| 34 |
+
|
| 35 |
+
# inputs.json
|
| 36 |
+
save_json(pack_dir / "inputs.json", extracted)
|
| 37 |
+
|
| 38 |
+
# constraints.json (default, can be edited later)
|
| 39 |
+
default_constraints = {
|
| 40 |
+
"analytics_first": True,
|
| 41 |
+
"require_longitudinal": True,
|
| 42 |
+
"indicator_priority": ["obesity_pct", "metabolic_syndrome_pct", "membership_2021"],
|
| 43 |
+
"capacity_formula": "teams * clients_per_team_per_day * working_days",
|
| 44 |
+
"cost_formula": "startup_per_client + ongoing_per_client",
|
| 45 |
+
"prioritization_rule": "Sort by membership_2021 and comorbidity prevalence; tie-breaker: infrastructure"
|
| 46 |
+
}
|
| 47 |
+
save_json(pack_dir / "constraints.json", default_constraints)
|
| 48 |
+
|
| 49 |
+
# schema.json (points to global analysis schema)
|
| 50 |
+
save_json(pack_dir / "schema.json", {
|
| 51 |
+
"archetypes": ["burden_prioritization", "capacity", "cost", "outcomes", "optimization"],
|
| 52 |
+
"output_schema": "schemas/analysis_output.schema.json"
|
| 53 |
+
})
|
| 54 |
+
|
| 55 |
+
# placeholders to support pipeline (optional for new packs)
|
| 56 |
+
save_json(pack_dir / "rubric.json", {
|
| 57 |
+
"set_equals": [],
|
| 58 |
+
"must_contain": [],
|
| 59 |
+
"numeric_equals": []
|
| 60 |
+
})
|
| 61 |
+
save_json(pack_dir / "expected.json", {
|
| 62 |
+
"note": "Optional gold output for regression tests. Populate when available."
|
| 63 |
+
})
|
| 64 |
+
|
| 65 |
+
# empty clarifications; you can fill interactively or with a UI later
|
| 66 |
+
save_json(pack_dir / "clarifications.json", {
|
| 67 |
+
"_note": "Populate with answers to Phase 1 clarification questions if you want batch runs."
|
| 68 |
+
})
|
| 69 |
+
|
| 70 |
+
return pack_dir
|