import re
import requests
import nbformat
import json


RAW_ROOT = "https://raw.githubusercontent.com/Qiskit/textbook/main/notebooks/"
# README locations we now support
_SECTIONS: dict[str, str] = {
    "intro": "intro/README.md",
    "ch-states": "ch-states/README.md",
    "ch-gates": "ch-gates/README.md",
    "ch-algorithms": "ch-algorithms/README.md",
}


# ───────────────────────────────────────────────────────────────────
# internals
# ───────────────────────────────────────────────────────────────────
def _scrape_readme(rel_path: str) -> list[str]:
    """Return *.ipynb files mentioned in a README; empty list on failure."""
    try:
        md = requests.get(f"{RAW_ROOT}{rel_path}", timeout=10).text
        # markdown link target: (...filename.ipynb)
        found = re.findall(r"\(([^)]+?\.ipynb)\)", md)
        # Clean up relative path prefixes like "./"
        return [name.lstrip("./") for name in found]
    except requests.RequestException:
        return []


def _discover_files() -> list[str]:
    """Aggregate notebooks from all configured READMEs (no fallback)."""
    files: list[str] = []
    for dir_key, readme in _SECTIONS.items():
        found = _scrape_readme(readme)
        # Prepend the directory path if the README gives bare filenames
        prefixed = [name if "/" in name else f"{dir_key}/{name}" for name in found]
        files.extend(prefixed)
    return files


def _pretty(path: str) -> str:
    """'ch-states/bloch_sphere.ipynb' → 'Bloch Sphere'."""
    fname = path.rsplit("/", 1)[-1]
    return fname.replace("-", " ").replace(".ipynb", "").title()


# ───────────────────────────────────────────────────────────────────
# public tools
# ───────────────────────────────────────────────────────────────────
def get_theory_topics() -> str:
    """Return a structured list of available quantum theory topics.

    Discovers available Jupyter notebooks from the Qiskit textbook across all
    four main chapters (intro, ch-states, ch-gates, ch-algorithms) by scraping
    their respective README files.

    Returns:
        str: JSON string containing a structured list of topics with title, slug, and path.
            For example:
            '{
              "topics": [
                {
                  "title": "What Is Quantum?",
                  "slug": "what-is-quantum",
                  "path": "intro/what-is-quantum.ipynb"
                }
              ]
            }'
            Returns JSON with empty topics array if network requests fail.

    Note:
        If network requests fail, returns JSON with empty topics array instead of
        falling back to hardcoded content.
    """
    try:
        discovered_files = _discover_files()
        if not discovered_files:
            return json.dumps({"topics": []})

        topics = []
        for path in discovered_files:
            title = _pretty(path)
            slug = path.rsplit("/", 1)[-1].removesuffix(".ipynb")
            topics.append({"title": title, "slug": slug, "path": path})

        return json.dumps({"topics": topics}, indent=2)
    except Exception:
        return json.dumps({"topics": []})


def get_theory(
    topic: str,
    markdown_only: bool = True,
    include_headers: bool = True,
) -> str:
    """Download and parse a Qiskit textbook notebook, returning its content as text.

    Accepts flexible topic identification: pretty names ("Teleportation"),
    slugs ("teleportation"), or full paths ("intro/teleportation.ipynb").
    Downloads the notebook from GitHub and extracts its content.

    Args:
        topic (str): The quantum topic to fetch. Can be:
            - Pretty name: "Teleportation", "What Is Quantum"
            - Slug: "teleportation", "what-is-quantum"
            - Full path: "intro/teleportation.ipynb"
        markdown_only (bool, optional): If True, include only markdown cells.
            If False, also include code cells wrapped in ```python blocks.
            Defaults to True.
        include_headers (bool, optional): If True, prepend an H1 header with
            the topic name for better readability. Defaults to True.

    Returns:
        str: JSON string containing the topic name and notebook content.
            For example:
            '{
              "topic": "Teleportation",
              "content": "# Teleportation\\n\\nQuantum teleportation is a process..."
            }'
            Returns JSON with error message in content field if topic not found
            or network requests fail.

    Example:
        >>> result = get_theory("teleportation")
        >>> data = json.loads(result)
        >>> print(data["topic"])
        Teleportation
    """
    topics_json = get_theory_topics()
    topics_data = json.loads(topics_json)
    topics = topics_data.get("topics", [])

    # Build lenient lookup table
    lookup: dict[str, str] = {}
    for topic_info in topics:
        title = topic_info["title"]
        slug = topic_info["slug"]
        path = topic_info["path"]

        lookup[title.lower()] = path
        lookup[slug.lower()] = path
        lookup[path.lower()] = path

    key = topic.lower()
    if key not in lookup:
        if not topics:
            return json.dumps(
                {
                    "topic": topic,
                    "content": "Unable to get theory - no topics available (network may be down)",
                }
            )
        available_topics = ", ".join([t["title"] for t in topics])
        return json.dumps(
            {
                "topic": topic,
                "content": f"Topic unknown: '{topic}'. Available topics: {available_topics}",
            }
        )

    path = lookup[key]
    topic_title = _pretty(path)

    try:
        raw_json = requests.get(f"{RAW_ROOT}{path}", timeout=20).text
        nb = nbformat.reads(raw_json, as_version=4)
    except Exception:
        return json.dumps(
            {
                "topic": topic_title,
                "content": "Unable to get theory - failed to download or parse notebook content",
            }
        )

    chunks: list[str] = []
    if include_headers:
        chunks.append(f"# {topic_title}\n")

    for cell in nb.cells:
        if cell.cell_type == "markdown":
            chunks.append(cell.source)
        elif cell.cell_type == "code" and not markdown_only:
            chunks.append(f"```python\n{cell.source}\n```")

    content = "\n\n".join(chunks)
    return json.dumps({"topic": topic_title, "content": content}, indent=2)