File size: 5,247 Bytes
7b71bd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# tools/wikipedia_tools.py
"""
Wikipedia search tools using LlamaIndex's ready-made Wikipedia integration
Based on search results showing WikipediaToolSpec usage
"""
from typing import Optional, List
import logging
from .utils import logger
try:
from llama_index.tools.wikipedia import WikipediaToolSpec
WIKIPEDIA_AVAILABLE = True
except ImportError:
logger.warning("LlamaIndex Wikipedia tools not available. Install with: pip install llama-index-tools-wikipedia")
WIKIPEDIA_AVAILABLE = False
class WikipediaTools:
"""Wikipedia search and page retrieval tools using LlamaIndex"""
def __init__(self):
if not WIKIPEDIA_AVAILABLE:
raise ImportError("LlamaIndex Wikipedia tools not installed")
# Initialize the LlamaIndex Wikipedia tool spec
self.tool_spec = WikipediaToolSpec()
logger.info("Wikipedia tools initialized successfully")
def search_wikipedia(self, query: str, language: str = 'en') -> str:
"""
Search Wikipedia for pages related to a query
Based on search results showing search_data method
Args:
query: Search term
language: Wikipedia language (default: 'en')
Returns:
Search results from Wikipedia
"""
try:
logger.info(f"Searching Wikipedia for: {query}")
# Use LlamaIndex's search_data method (from search results)
results = self.tool_spec.search_data(query=query, lang=language)
if results:
logger.info(f"Found Wikipedia results for: {query}")
return results
else:
return f"No Wikipedia results found for: {query}"
except Exception as e:
error_msg = f"Error searching Wikipedia: {str(e)}"
logger.error(error_msg)
return error_msg
def get_wikipedia_page(self, page_title: str, language: str = 'en') -> str:
"""
Retrieve a specific Wikipedia page
Based on search results showing load_data method
Args:
page_title: Title of the Wikipedia page
language: Wikipedia language (default: 'en')
Returns:
Content of the Wikipedia page
"""
try:
logger.info(f"Retrieving Wikipedia page: {page_title}")
# Use LlamaIndex's load_data method (from search results)
content = self.tool_spec.load_data(page=page_title, lang=language)
if content:
logger.info(f"Successfully retrieved Wikipedia page: {page_title}")
return content
else:
return f"Wikipedia page not found: {page_title}"
except Exception as e:
error_msg = f"Error retrieving Wikipedia page '{page_title}': {str(e)}"
logger.error(error_msg)
return error_msg
def search_and_summarize(self, query: str, language: str = 'en') -> str:
"""
Search Wikipedia and get a focused summary
Combines search and page retrieval for better results
"""
try:
# First search for relevant pages
search_results = self.search_wikipedia(query, language)
if "No Wikipedia results found" in search_results:
return search_results
# Extract the first few sentences for a summary
# This gives us the most relevant information without overwhelming the LLM
lines = search_results.split('\n')
summary_lines = [line for line in lines[:10] if line.strip()]
summary = '\n'.join(summary_lines)
# Truncate if too long (to stay within token limits)
if len(summary) > 2000:
summary = summary[:2000] + "..."
return summary
except Exception as e:
error_msg = f"Error in Wikipedia search and summarize: {str(e)}"
logger.error(error_msg)
return error_msg
# Convenience functions for direct use
def search_wikipedia(query: str, language: str = 'en') -> str:
"""Standalone function to search Wikipedia"""
if not WIKIPEDIA_AVAILABLE:
return "Wikipedia tools not available. Please install llama-index-tools-wikipedia"
tools = WikipediaTools()
return tools.search_wikipedia(query, language)
def get_wikipedia_page(page_title: str, language: str = 'en') -> str:
"""Standalone function to get a Wikipedia page"""
if not WIKIPEDIA_AVAILABLE:
return "Wikipedia tools not available. Please install llama-index-tools-wikipedia"
tools = WikipediaTools()
return tools.get_wikipedia_page(page_title, language)
def wikipedia_summary(query: str, language: str = 'en') -> str:
"""Standalone function to get a Wikipedia summary"""
if not WIKIPEDIA_AVAILABLE:
return "Wikipedia tools not available. Please install llama-index-tools-wikipedia"
tools = WikipediaTools()
return tools.search_and_summarize(query, language)
|