# -*- coding: utf-8 -*- """ Created on Sun Aug 17 20:36:17 2025 @author: adetu """ import json, sys from json import JSONDecodeError from pathlib import Path from jsonschema import Draft202012Validator as V def load_schema(): candidates = [ Path("schemas/attack_plan.schema.json"), Path("../schemas/attack_plan.schema.json"), ] for p in candidates: if p.exists(): raw = p.read_text(encoding="utf-8-sig") # handle BOM # strip accidental markdown fences lines = [ln for ln in raw.splitlines() if not ln.strip().startswith("```")] raw = "\n".join(lines).strip() if not raw: raise RuntimeError(f"Schema file is empty: {p}") try: schema = json.loads(raw) print(f"[schema] loaded {p.resolve()}") return schema except JSONDecodeError as e: ctx = raw.splitlines()[max(e.lineno-2,0):e.lineno+1] print(f"[schema] JSON error in {p}: {e.msg} at line {e.lineno}, col {e.colno}") print("Context:\n" + "\n".join(ctx)) raise raise FileNotFoundError("Could not find schema at schemas/attack_plan.schema.json") def main(): schema = load_schema() validator = V(schema) src = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("scripts/train_attackplan.jsonl") raw = src.read_text(encoding="utf-8") valid = invalid = 0 for i, line in enumerate(raw.splitlines(), 1): if not line.strip(): # skip blanks continue try: obj = json.loads(line) except JSONDecodeError as e: print(f"[line {i}] not JSON: {e.msg} at {e.lineno}:{e.colno}") print(" snippet:", line[:200]) invalid += 1 continue errs = sorted(validator.iter_errors(obj), key=lambda e: (list(e.path), e.message)) if errs: invalid += 1 print(f"[line {i}] INVALID:") for e in errs[:8]: print(" -", e.message, "at", list(e.path)) else: valid += 1 print(f"[done] {valid} valid, {invalid} invalid") if __name__ == "__main__": main()