import re import requests import nbformat import json RAW_ROOT = "https://raw.githubusercontent.com/Qiskit/textbook/main/notebooks/" # README locations we now support _SECTIONS: dict[str, str] = { "intro": "intro/README.md", "ch-states": "ch-states/README.md", "ch-gates": "ch-gates/README.md", "ch-algorithms": "ch-algorithms/README.md", } # ─────────────────────────────────────────────────────────────────── # internals # ─────────────────────────────────────────────────────────────────── def _scrape_readme(rel_path: str) -> list[str]: """Return *.ipynb files mentioned in a README; empty list on failure.""" try: md = requests.get(f"{RAW_ROOT}{rel_path}", timeout=10).text # markdown link target: (...filename.ipynb) found = re.findall(r"\(([^)]+?\.ipynb)\)", md) # Clean up relative path prefixes like "./" return [name.lstrip("./") for name in found] except requests.RequestException: return [] def _discover_files() -> list[str]: """Aggregate notebooks from all configured READMEs (no fallback).""" files: list[str] = [] for dir_key, readme in _SECTIONS.items(): found = _scrape_readme(readme) # Prepend the directory path if the README gives bare filenames prefixed = [name if "/" in name else f"{dir_key}/{name}" for name in found] files.extend(prefixed) return files def _pretty(path: str) -> str: """'ch-states/bloch_sphere.ipynb' → 'Bloch Sphere'.""" fname = path.rsplit("/", 1)[-1] return fname.replace("-", " ").replace(".ipynb", "").title() # ─────────────────────────────────────────────────────────────────── # public tools # ─────────────────────────────────────────────────────────────────── def get_theory_topics() -> str: """Return a structured list of available quantum theory topics. Discovers available Jupyter notebooks from the Qiskit textbook across all four main chapters (intro, ch-states, ch-gates, ch-algorithms) by scraping their respective README files. Returns: str: JSON string containing a structured list of topics with title, slug, and path. For example: '{ "topics": [ { "title": "What Is Quantum?", "slug": "what-is-quantum", "path": "intro/what-is-quantum.ipynb" } ] }' Returns JSON with empty topics array if network requests fail. Note: If network requests fail, returns JSON with empty topics array instead of falling back to hardcoded content. """ try: discovered_files = _discover_files() if not discovered_files: return json.dumps({"topics": []}) topics = [] for path in discovered_files: title = _pretty(path) slug = path.rsplit("/", 1)[-1].removesuffix(".ipynb") topics.append({"title": title, "slug": slug, "path": path}) return json.dumps({"topics": topics}, indent=2) except Exception: return json.dumps({"topics": []}) def get_theory( topic: str, markdown_only: bool = True, include_headers: bool = True, ) -> str: """Download and parse a Qiskit textbook notebook, returning its content as text. Accepts flexible topic identification: pretty names ("Teleportation"), slugs ("teleportation"), or full paths ("intro/teleportation.ipynb"). Downloads the notebook from GitHub and extracts its content. Args: topic (str): The quantum topic to fetch. Can be: - Pretty name: "Teleportation", "What Is Quantum" - Slug: "teleportation", "what-is-quantum" - Full path: "intro/teleportation.ipynb" markdown_only (bool, optional): If True, include only markdown cells. If False, also include code cells wrapped in ```python blocks. Defaults to True. include_headers (bool, optional): If True, prepend an H1 header with the topic name for better readability. Defaults to True. Returns: str: JSON string containing the topic name and notebook content. For example: '{ "topic": "Teleportation", "content": "# Teleportation\\n\\nQuantum teleportation is a process..." }' Returns JSON with error message in content field if topic not found or network requests fail. Example: >>> result = get_theory("teleportation") >>> data = json.loads(result) >>> print(data["topic"]) Teleportation """ topics_json = get_theory_topics() topics_data = json.loads(topics_json) topics = topics_data.get("topics", []) # Build lenient lookup table lookup: dict[str, str] = {} for topic_info in topics: title = topic_info["title"] slug = topic_info["slug"] path = topic_info["path"] lookup[title.lower()] = path lookup[slug.lower()] = path lookup[path.lower()] = path key = topic.lower() if key not in lookup: if not topics: return json.dumps( { "topic": topic, "content": "Unable to get theory - no topics available (network may be down)", } ) available_topics = ", ".join([t["title"] for t in topics]) return json.dumps( { "topic": topic, "content": f"Topic unknown: '{topic}'. Available topics: {available_topics}", } ) path = lookup[key] topic_title = _pretty(path) try: raw_json = requests.get(f"{RAW_ROOT}{path}", timeout=20).text nb = nbformat.reads(raw_json, as_version=4) except Exception: return json.dumps( { "topic": topic_title, "content": "Unable to get theory - failed to download or parse notebook content", } ) chunks: list[str] = [] if include_headers: chunks.append(f"# {topic_title}\n") for cell in nb.cells: if cell.cell_type == "markdown": chunks.append(cell.source) elif cell.cell_type == "code" and not markdown_only: chunks.append(f"```python\n{cell.source}\n```") content = "\n\n".join(chunks) return json.dumps({"topic": topic_title, "content": content}, indent=2)