ds6b-attackplan-qlora / scripts /make_chat_from_plans.py
adetuire1's picture
Upload folder using huggingface_hub
fba140f verified
# -*- coding: utf-8 -*-
import json, argparse, uuid
from pathlib import Path
SYS = "You output ONLY JSON, no explanation. Validate against AttackPlan v1.1 semantics."
def plan_to_prompt(p):
# small, human prompt summarizing the requested edits
bits=[]
for it in p.get("plan", [])[:6]:
op = it.get("op","set"); pt = it.get("point",""); val = it.get("attack_value")
nm = it.get("name","")
scope = (it.get("scope") or {})
area = scope.get("mg"); mim = scope.get("mim")
s = f"{op} {pt} on {nm} to {val}"
if mim: s += f" in {mim}"
if area: s += f" ({area})"
bits.append(s)
return "; ".join(bits) if bits else "Generate an AttackPlan JSON v1.1 for no-op."
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--src", required=True, help="scripts/train_attackplan.aug.jsonl (or filtered)")
ap.add_argument("--out", default="datasets/chat_attackplan.jsonl")
args = ap.parse_args()
src = Path(args.src).read_text(encoding="utf-8-sig").splitlines()
out = []
for ln in src:
if not ln.strip(): continue
obj = json.loads(ln)
prompt = plan_to_prompt(obj)
out.append({
"id": str(uuid.uuid4()),
"messages": [
{"role":"system","content":SYS},
{"role":"user","content":f"Task: {prompt}\nReturn ONLY the JSON."},
{"role":"assistant","content": json.dumps(obj, ensure_ascii=False)}
]
})
Path(args.out).parent.mkdir(parents=True, exist_ok=True)
Path(args.out).write_text("\n".join(json.dumps(x, ensure_ascii=False) for x in out)+"\n", encoding="utf-8")
print("[ok] wrote", Path(args.out).resolve(), "rows:", len(out))
if __name__ == "__main__":
main()