AI_Agents_Course_Submission / tools /wikipedia_tools.py
omarequalmars
Added wikipedia search
7b71bd5
# tools/wikipedia_tools.py
"""
Wikipedia search tools using LlamaIndex's ready-made Wikipedia integration
Based on search results showing WikipediaToolSpec usage
"""
from typing import Optional, List
import logging
from .utils import logger
try:
from llama_index.tools.wikipedia import WikipediaToolSpec
WIKIPEDIA_AVAILABLE = True
except ImportError:
logger.warning("LlamaIndex Wikipedia tools not available. Install with: pip install llama-index-tools-wikipedia")
WIKIPEDIA_AVAILABLE = False
class WikipediaTools:
"""Wikipedia search and page retrieval tools using LlamaIndex"""
def __init__(self):
if not WIKIPEDIA_AVAILABLE:
raise ImportError("LlamaIndex Wikipedia tools not installed")
# Initialize the LlamaIndex Wikipedia tool spec
self.tool_spec = WikipediaToolSpec()
logger.info("Wikipedia tools initialized successfully")
def search_wikipedia(self, query: str, language: str = 'en') -> str:
"""
Search Wikipedia for pages related to a query
Based on search results showing search_data method
Args:
query: Search term
language: Wikipedia language (default: 'en')
Returns:
Search results from Wikipedia
"""
try:
logger.info(f"Searching Wikipedia for: {query}")
# Use LlamaIndex's search_data method (from search results)
results = self.tool_spec.search_data(query=query, lang=language)
if results:
logger.info(f"Found Wikipedia results for: {query}")
return results
else:
return f"No Wikipedia results found for: {query}"
except Exception as e:
error_msg = f"Error searching Wikipedia: {str(e)}"
logger.error(error_msg)
return error_msg
def get_wikipedia_page(self, page_title: str, language: str = 'en') -> str:
"""
Retrieve a specific Wikipedia page
Based on search results showing load_data method
Args:
page_title: Title of the Wikipedia page
language: Wikipedia language (default: 'en')
Returns:
Content of the Wikipedia page
"""
try:
logger.info(f"Retrieving Wikipedia page: {page_title}")
# Use LlamaIndex's load_data method (from search results)
content = self.tool_spec.load_data(page=page_title, lang=language)
if content:
logger.info(f"Successfully retrieved Wikipedia page: {page_title}")
return content
else:
return f"Wikipedia page not found: {page_title}"
except Exception as e:
error_msg = f"Error retrieving Wikipedia page '{page_title}': {str(e)}"
logger.error(error_msg)
return error_msg
def search_and_summarize(self, query: str, language: str = 'en') -> str:
"""
Search Wikipedia and get a focused summary
Combines search and page retrieval for better results
"""
try:
# First search for relevant pages
search_results = self.search_wikipedia(query, language)
if "No Wikipedia results found" in search_results:
return search_results
# Extract the first few sentences for a summary
# This gives us the most relevant information without overwhelming the LLM
lines = search_results.split('\n')
summary_lines = [line for line in lines[:10] if line.strip()]
summary = '\n'.join(summary_lines)
# Truncate if too long (to stay within token limits)
if len(summary) > 2000:
summary = summary[:2000] + "..."
return summary
except Exception as e:
error_msg = f"Error in Wikipedia search and summarize: {str(e)}"
logger.error(error_msg)
return error_msg
# Convenience functions for direct use
def search_wikipedia(query: str, language: str = 'en') -> str:
"""Standalone function to search Wikipedia"""
if not WIKIPEDIA_AVAILABLE:
return "Wikipedia tools not available. Please install llama-index-tools-wikipedia"
tools = WikipediaTools()
return tools.search_wikipedia(query, language)
def get_wikipedia_page(page_title: str, language: str = 'en') -> str:
"""Standalone function to get a Wikipedia page"""
if not WIKIPEDIA_AVAILABLE:
return "Wikipedia tools not available. Please install llama-index-tools-wikipedia"
tools = WikipediaTools()
return tools.get_wikipedia_page(page_title, language)
def wikipedia_summary(query: str, language: str = 'en') -> str:
"""Standalone function to get a Wikipedia summary"""
if not WIKIPEDIA_AVAILABLE:
return "Wikipedia tools not available. Please install llama-index-tools-wikipedia"
tools = WikipediaTools()
return tools.search_and_summarize(query, language)