ds6b-attackplan-qlora / scripts /make_chat_from_plans.py

Upload folder using huggingface_hub

fba140f verified about 2 months ago

1.82 kB

	# -- coding: utf-8 --
	import json, argparse, uuid
	from pathlib import Path

	SYS = "You output ONLY JSON, no explanation. Validate against AttackPlan v1.1 semantics."

	def plan_to_prompt(p):
	# small, human prompt summarizing the requested edits
	bits=[]
	for it in p.get("plan", [])[:6]:
	op = it.get("op","set"); pt = it.get("point",""); val = it.get("attack_value")
	nm = it.get("name","")
	scope = (it.get("scope") or {})
	area = scope.get("mg"); mim = scope.get("mim")
	s = f"{op} {pt} on {nm} to {val}"
	if mim: s += f" in {mim}"
	if area: s += f" ({area})"
	bits.append(s)
	return "; ".join(bits) if bits else "Generate an AttackPlan JSON v1.1 for no-op."

	def main():
	ap = argparse.ArgumentParser()
	ap.add_argument("--src", required=True, help="scripts/train_attackplan.aug.jsonl (or filtered)")
	ap.add_argument("--out", default="datasets/chat_attackplan.jsonl")
	args = ap.parse_args()

	src = Path(args.src).read_text(encoding="utf-8-sig").splitlines()
	out = []
	for ln in src:
	if not ln.strip(): continue
	obj = json.loads(ln)
	prompt = plan_to_prompt(obj)
	out.append({
	"id": str(uuid.uuid4()),
	"messages": [
	{"role":"system","content":SYS},
	{"role":"user","content":f"Task: {prompt}\nReturn ONLY the JSON."},
	{"role":"assistant","content": json.dumps(obj, ensure_ascii=False)}
	]
	})
	Path(args.out).parent.mkdir(parents=True, exist_ok=True)
	Path(args.out).write_text("\n".join(json.dumps(x, ensure_ascii=False) for x in out)+"\n", encoding="utf-8")
	print("[ok] wrote", Path(args.out).resolve(), "rows:", len(out))

	if __name__ == "__main__":
	main()