Spaces:
Running
Running
| import json | |
| from config.settings import RAW_DIR | |
| from pathlib import Path | |
| fixed = 0 | |
| for f in RAW_DIR.glob("*.json"): | |
| if f.name == "paper_index.json": | |
| continue | |
| with open(f, "r", encoding = 'utf-8') as fp: | |
| data = json.load(fp) | |
| if not data.get("primary_category"): | |
| cats = data.get("categories", []) | |
| data['primary_category'] = cats[0] if cats else "cs.LG" | |
| with open(f, "w", encoding = "utf-8") as fp: | |
| json.dump(data, fp, indent = 2, ensure_ascii = False) | |
| fixed += 1 | |
| print(f"Fixed {fixed} raw metadata files") |