Final_Assignment / tools /wiki_search.py
Alfred828's picture
Create tools/wiki_search.py
7672dcd verified
from typing import Any
import requests
from markdownify import markdownify
from pydantic import BaseModel, Field
class WikipediaOpensearchInput(BaseModel):
query: str = Field(
description="The search term or keyword to look up on English Wikipedia."
)
class GetPageTitleExcerptSectionsInput(BaseModel):
page_title: str = Field(description="The exact title of the Wikipedia page.")
class GetPageSectionContentInput(BaseModel):
page_title: str = Field(description="The exact Wikipedia article title.")
section_index: str = Field("The index of the section (from section metadata).")
class SearchWikipediaEn:
@staticmethod
def wikipedia_opensearch(query: str) -> str:
"""
Searches for Wikipedia articles matching the given query using the OpenSearch API.
Args:
query (str): The search term or keyword to look up on English Wikipedia.
Returns:
str: JSON-list with items including matched titles, descriptions, and URLs.
Example:
[
'Python programming',
['Python (programming language)', ... ],
['...', ...],
['https://en.wikipedia.org/wiki/Python_(programming_language)', ...]
]
"""
url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "opensearch",
"namespace": "0",
"search": query,
"limit": 10,
"format": "json",
}
response = requests.get(url=url, params=params)
return response.json()
@staticmethod
def get_page_title_excerpt_sections(page_title: str) -> dict[str, Any]:
"""
Retrieves the title, summary excerpt (plain text), and section structure of a Wikipedia article.
Args:
page_title (str): The exact title of the Wikipedia page.
Returns:
dict: {
'excerpt': (str) Plain text summary/excerpt of the page,
'sections': (list) List of dictionaries describing section metadata.
}
Example:
{
"excerpt": "Python is a high-level programming language...",
"sections": [
{"toclevel": 1, "level": "2", "line": "History", "index": "1", ...},
...
]
}
"""
url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "query",
"prop": "extracts",
"titles": page_title,
"explaintext": True,
"format": "json",
"formatversion": 2,
}
req = requests.get(url, params=params)
page_title = req.json()["query"]["pages"][0]["title"]
page_excerpt = req.json()["query"]["pages"][0]["extract"]
url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "parse",
"prop": "sections",
"page": page_title,
"format": "json",
}
rsp = requests.get(url, params=params)
page_sections = rsp.json()["parse"]["sections"]
return {
"excerpt": page_excerpt,
"sections": page_sections,
}
@staticmethod
def get_page_section_content(page_title: str, section_index: str) -> str:
"""
Fetches the Markdown-formatted content of a specific section from a Wikipedia article.
Args:
page_title (str): The exact Wikipedia article title.
section_index (str): The index of the section (from section metadata).
Returns:
str: Markdown-formatted content of the specified section.
Example:
'# History\nPython was conceived in the late 1980s...'
"""
url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "parse",
"page": page_title,
"format": "json",
"prop": "text",
}
if section_index:
params["section"] = str(section_index)
rsp = requests.get(url, params=params)
return markdownify(rsp.json()["parse"]["text"]["*"])