VED-AGI-1 commited on
Commit
8c7975e
·
verified ·
1 Parent(s): 031e209

Create pack_builder.py

Browse files
Files changed (1) hide show
  1. pipeline/pack_builder.py +70 -0
pipeline/pack_builder.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import json
3
+ import re
4
+ from .io_utils import save_json
5
+ # Replace with your real LLM runner
6
+ from .run_two_phase import call_model
7
+
8
+ def slugify(name: str) -> str:
9
+ s = re.sub(r"[^a-zA-Z0-9]+", "_", name.strip()).strip("_").lower()
10
+ return s or "scenario"
11
+
12
+ def build_pack_from_scenario(root_dir: Path, scenario_text: str, pack_name_hint: str = "") -> Path:
13
+ """
14
+ 1) Calls extractor prompts to create inputs.json from free text.
15
+ 2) Writes default constraints.json and schema.json.
16
+ 3) Returns the created pack path.
17
+ """
18
+ prompts_dir = root_dir / "prompts"
19
+ extractor_sys = (prompts_dir / "extractor_system.txt").read_text(encoding="utf-8")
20
+ extractor_user_tmpl = (prompts_dir / "extractor_user_template.txt").read_text(encoding="utf-8")
21
+ user_prompt = extractor_user_tmpl.replace("{SCENARIO_TEXT}", scenario_text)
22
+
23
+ raw = call_model(extractor_sys, user_prompt)
24
+ try:
25
+ extracted = json.loads(raw)
26
+ except Exception as e:
27
+ raise ValueError(f"Extractor produced non-JSON output.\nRaw:\n{raw}") from e
28
+
29
+ # Decide pack name
30
+ hint = pack_name_hint or extracted.get("context") or "scenario"
31
+ pack_slug = slugify(hint)[:48]
32
+ pack_dir = root_dir / "packs" / pack_slug
33
+ pack_dir.mkdir(parents=True, exist_ok=True)
34
+
35
+ # inputs.json
36
+ save_json(pack_dir / "inputs.json", extracted)
37
+
38
+ # constraints.json (default, can be edited later)
39
+ default_constraints = {
40
+ "analytics_first": True,
41
+ "require_longitudinal": True,
42
+ "indicator_priority": ["obesity_pct", "metabolic_syndrome_pct", "membership_2021"],
43
+ "capacity_formula": "teams * clients_per_team_per_day * working_days",
44
+ "cost_formula": "startup_per_client + ongoing_per_client",
45
+ "prioritization_rule": "Sort by membership_2021 and comorbidity prevalence; tie-breaker: infrastructure"
46
+ }
47
+ save_json(pack_dir / "constraints.json", default_constraints)
48
+
49
+ # schema.json (points to global analysis schema)
50
+ save_json(pack_dir / "schema.json", {
51
+ "archetypes": ["burden_prioritization", "capacity", "cost", "outcomes", "optimization"],
52
+ "output_schema": "schemas/analysis_output.schema.json"
53
+ })
54
+
55
+ # placeholders to support pipeline (optional for new packs)
56
+ save_json(pack_dir / "rubric.json", {
57
+ "set_equals": [],
58
+ "must_contain": [],
59
+ "numeric_equals": []
60
+ })
61
+ save_json(pack_dir / "expected.json", {
62
+ "note": "Optional gold output for regression tests. Populate when available."
63
+ })
64
+
65
+ # empty clarifications; you can fill interactively or with a UI later
66
+ save_json(pack_dir / "clarifications.json", {
67
+ "_note": "Populate with answers to Phase 1 clarification questions if you want batch runs."
68
+ })
69
+
70
+ return pack_dir