Final_Assignment

Sleeping

App Files Files Community

Final_Assignment / tools /wiki_search.py

Alfred828

Create tools/wiki_search.py

7672dcd verified 3 months ago

raw

history blame contribute delete

4.25 kB

	from typing import Any

	import requests
	from markdownify import markdownify
	from pydantic import BaseModel, Field


	class WikipediaOpensearchInput(BaseModel):
	query: str = Field(
	description="The search term or keyword to look up on English Wikipedia."
	)


	class GetPageTitleExcerptSectionsInput(BaseModel):
	page_title: str = Field(description="The exact title of the Wikipedia page.")


	class GetPageSectionContentInput(BaseModel):
	page_title: str = Field(description="The exact Wikipedia article title.")
	section_index: str = Field("The index of the section (from section metadata).")


	class SearchWikipediaEn:
	@staticmethod
	def wikipedia_opensearch(query: str) -> str:
	"""
	Searches for Wikipedia articles matching the given query using the OpenSearch API.

	Args:
	query (str): The search term or keyword to look up on English Wikipedia.

	Returns:
	str: JSON-list with items including matched titles, descriptions, and URLs.
	Example:
	[
	'Python programming',
	['Python (programming language)', ... ],
	['...', ...],
	['https://en.wikipedia.org/wiki/Python_(programming_language)', ...]
	]
	"""

	url = "https://en.wikipedia.org/w/api.php"
	params = {
	"action": "opensearch",
	"namespace": "0",
	"search": query,
	"limit": 10,
	"format": "json",
	}

	response = requests.get(url=url, params=params)

	return response.json()

	@staticmethod
	def get_page_title_excerpt_sections(page_title: str) -> dict[str, Any]:
	"""
	Retrieves the title, summary excerpt (plain text), and section structure of a Wikipedia article.

	Args:
	page_title (str): The exact title of the Wikipedia page.

	Returns:
	dict: {
	'excerpt': (str) Plain text summary/excerpt of the page,
	'sections': (list) List of dictionaries describing section metadata.
	}
	Example:
	{
	"excerpt": "Python is a high-level programming language...",
	"sections": [
	{"toclevel": 1, "level": "2", "line": "History", "index": "1", ...},
	...
	]
	}
	"""
	url = "https://en.wikipedia.org/w/api.php"
	params = {
	"action": "query",
	"prop": "extracts",
	"titles": page_title,
	"explaintext": True,
	"format": "json",
	"formatversion": 2,
	}
	req = requests.get(url, params=params)

	page_title = req.json()["query"]["pages"][0]["title"]
	page_excerpt = req.json()["query"]["pages"][0]["extract"]

	url = "https://en.wikipedia.org/w/api.php"
	params = {
	"action": "parse",
	"prop": "sections",
	"page": page_title,
	"format": "json",
	}
	rsp = requests.get(url, params=params)
	page_sections = rsp.json()["parse"]["sections"]

	return {
	"excerpt": page_excerpt,
	"sections": page_sections,
	}

	@staticmethod
	def get_page_section_content(page_title: str, section_index: str) -> str:
	"""
	Fetches the Markdown-formatted content of a specific section from a Wikipedia article.

	Args:
	page_title (str): The exact Wikipedia article title.
	section_index (str): The index of the section (from section metadata).

	Returns:
	str: Markdown-formatted content of the specified section.
	Example:
	'# History\nPython was conceived in the late 1980s...'
	"""
	url = "https://en.wikipedia.org/w/api.php"
	params = {
	"action": "parse",
	"page": page_title,
	"format": "json",
	"prop": "text",
	}
	if section_index:
	params["section"] = str(section_index)
	rsp = requests.get(url, params=params)

	return markdownify(rsp.json()["parse"]["text"]["*"])