from typing import Any import requests from markdownify import markdownify from pydantic import BaseModel, Field class WikipediaOpensearchInput(BaseModel): query: str = Field( description="The search term or keyword to look up on English Wikipedia." ) class GetPageTitleExcerptSectionsInput(BaseModel): page_title: str = Field(description="The exact title of the Wikipedia page.") class GetPageSectionContentInput(BaseModel): page_title: str = Field(description="The exact Wikipedia article title.") section_index: str = Field("The index of the section (from section metadata).") class SearchWikipediaEn: @staticmethod def wikipedia_opensearch(query: str) -> str: """ Searches for Wikipedia articles matching the given query using the OpenSearch API. Args: query (str): The search term or keyword to look up on English Wikipedia. Returns: str: JSON-list with items including matched titles, descriptions, and URLs. Example: [ 'Python programming', ['Python (programming language)', ... ], ['...', ...], ['https://en.wikipedia.org/wiki/Python_(programming_language)', ...] ] """ url = "https://en.wikipedia.org/w/api.php" params = { "action": "opensearch", "namespace": "0", "search": query, "limit": 10, "format": "json", } response = requests.get(url=url, params=params) return response.json() @staticmethod def get_page_title_excerpt_sections(page_title: str) -> dict[str, Any]: """ Retrieves the title, summary excerpt (plain text), and section structure of a Wikipedia article. Args: page_title (str): The exact title of the Wikipedia page. Returns: dict: { 'excerpt': (str) Plain text summary/excerpt of the page, 'sections': (list) List of dictionaries describing section metadata. } Example: { "excerpt": "Python is a high-level programming language...", "sections": [ {"toclevel": 1, "level": "2", "line": "History", "index": "1", ...}, ... ] } """ url = "https://en.wikipedia.org/w/api.php" params = { "action": "query", "prop": "extracts", "titles": page_title, "explaintext": True, "format": "json", "formatversion": 2, } req = requests.get(url, params=params) page_title = req.json()["query"]["pages"][0]["title"] page_excerpt = req.json()["query"]["pages"][0]["extract"] url = "https://en.wikipedia.org/w/api.php" params = { "action": "parse", "prop": "sections", "page": page_title, "format": "json", } rsp = requests.get(url, params=params) page_sections = rsp.json()["parse"]["sections"] return { "excerpt": page_excerpt, "sections": page_sections, } @staticmethod def get_page_section_content(page_title: str, section_index: str) -> str: """ Fetches the Markdown-formatted content of a specific section from a Wikipedia article. Args: page_title (str): The exact Wikipedia article title. section_index (str): The index of the section (from section metadata). Returns: str: Markdown-formatted content of the specified section. Example: '# History\nPython was conceived in the late 1980s...' """ url = "https://en.wikipedia.org/w/api.php" params = { "action": "parse", "page": page_title, "format": "json", "prop": "text", } if section_index: params["section"] = str(section_index) rsp = requests.get(url, params=params) return markdownify(rsp.json()["parse"]["text"]["*"])