Spaces:
Running
Running
| import re | |
| import sys | |
| from pathlib import Path | |
| src_path = Path(sys.argv[1]) | |
| out_dir = Path(sys.argv[2]) | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| text = src_path.read_text(encoding='utf-8', errors='replace') | |
| parts = re.split(r'(?m)^# (.+)$', text) | |
| def slugify(s: str) -> str: | |
| s = s.strip().lower() | |
| s = re.sub(r'[^a-z0-9]+', '_', s).strip('_') | |
| return s[:80] or 'section' | |
| wanted = { | |
| 'architecture overview': 'architecture_overview', | |
| 'lifecycle': 'lifecycle', | |
| 'transports': 'transports', | |
| 'resources': 'resources', | |
| 'tools': 'tools', | |
| 'roots': 'roots', | |
| 'elicitation': 'elicitation', | |
| 'sampling': 'sampling', | |
| 'logging': 'logging', | |
| 'pagination': 'pagination', | |
| 'cancellation': 'cancellation', | |
| 'progress': 'progress', | |
| 'prompts': 'prompts', | |
| 'schema': 'schema_reference', | |
| 'security': 'security_best_practices', | |
| 'authorization': 'authorization', | |
| } | |
| sections = [] | |
| # parts: [preamble, title1, body1, title2, body2, ...] | |
| for i in range(1, len(parts), 2): | |
| title = parts[i].strip() | |
| body = parts[i+1].strip() if i+1 < len(parts) else '' | |
| key = title.strip().lower() | |
| # match desired sections loosely | |
| match = None | |
| for k in wanted: | |
| if k in key: | |
| match = wanted[k] | |
| break | |
| if match: | |
| sections.append((match, title, body)) | |
| # write files | |
| for idx, (stem, title, body) in enumerate(sections, start=1): | |
| path = out_dir / f"{idx:02d}_{stem}.md" | |
| path.write_text(f"# {title}\n\n{body}\n", encoding='utf-8') | |
| (out_dir / "README.md").write_text( | |
| "# MCP curated snapshot\n\n" | |
| "Generated from modelcontextprotocol.io/llms-full.txt and split into topic files for RAG.\n", | |
| encoding='utf-8' | |
| ) | |
| print(f"Wrote {len(sections)} MCP files to {out_dir}") | |