Final_Assignment

Sleeping

File size: 4,249 Bytes

7672dcd

from typing import Any

import requests
from markdownify import markdownify
from pydantic import BaseModel, Field


class WikipediaOpensearchInput(BaseModel):
    query: str = Field(
        description="The search term or keyword to look up on English Wikipedia."
    )


class GetPageTitleExcerptSectionsInput(BaseModel):
    page_title: str = Field(description="The exact title of the Wikipedia page.")


class GetPageSectionContentInput(BaseModel):
    page_title: str = Field(description="The exact Wikipedia article title.")
    section_index: str = Field("The index of the section (from section metadata).")


class SearchWikipediaEn:
    @staticmethod
    def wikipedia_opensearch(query: str) -> str:
        """
        Searches for Wikipedia articles matching the given query using the OpenSearch API.

        Args:
            query (str): The search term or keyword to look up on English Wikipedia.

        Returns:
            str: JSON-list with items including matched titles, descriptions, and URLs.
                Example:
                [
                    'Python programming',
                    ['Python (programming language)', ... ],
                    ['...', ...],
                    ['https://en.wikipedia.org/wiki/Python_(programming_language)', ...]
                ]
        """

        url = "https://en.wikipedia.org/w/api.php"
        params = {
            "action": "opensearch",
            "namespace": "0",
            "search": query,
            "limit": 10,
            "format": "json",
        }

        response = requests.get(url=url, params=params)

        return response.json()

    @staticmethod
    def get_page_title_excerpt_sections(page_title: str) -> dict[str, Any]:
        """
        Retrieves the title, summary excerpt (plain text), and section structure of a Wikipedia article.

        Args:
            page_title (str): The exact title of the Wikipedia page.

        Returns:
            dict: {
                'excerpt': (str) Plain text summary/excerpt of the page,
                'sections': (list) List of dictionaries describing section metadata.
            }
            Example:
            {
                "excerpt": "Python is a high-level programming language...",
                "sections": [
                    {"toclevel": 1, "level": "2", "line": "History", "index": "1", ...},
                    ...
                ]
            }
        """
        url = "https://en.wikipedia.org/w/api.php"
        params = {
            "action": "query",
            "prop": "extracts",
            "titles": page_title,
            "explaintext": True,
            "format": "json",
            "formatversion": 2,
        }
        req = requests.get(url, params=params)

        page_title = req.json()["query"]["pages"][0]["title"]
        page_excerpt = req.json()["query"]["pages"][0]["extract"]

        url = "https://en.wikipedia.org/w/api.php"
        params = {
            "action": "parse",
            "prop": "sections",
            "page": page_title,
            "format": "json",
        }
        rsp = requests.get(url, params=params)
        page_sections = rsp.json()["parse"]["sections"]

        return {
            "excerpt": page_excerpt,
            "sections": page_sections,
        }

    @staticmethod
    def get_page_section_content(page_title: str, section_index: str) -> str:
        """
        Fetches the Markdown-formatted content of a specific section from a Wikipedia article.

        Args:
            page_title (str): The exact Wikipedia article title.
            section_index (str): The index of the section (from section metadata).

        Returns:
            str: Markdown-formatted content of the specified section.
            Example:
                '# History\nPython was conceived in the late 1980s...'
        """
        url = "https://en.wikipedia.org/w/api.php"
        params = {
            "action": "parse",
            "page": page_title,
            "format": "json",
            "prop": "text",
        }
        if section_index:
            params["section"] = str(section_index)
        rsp = requests.get(url, params=params)

        return markdownify(rsp.json()["parse"]["text"]["*"])