File size: 5,247 Bytes
7b71bd5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# tools/wikipedia_tools.py
"""
Wikipedia search tools using LlamaIndex's ready-made Wikipedia integration
Based on search results showing WikipediaToolSpec usage
"""

from typing import Optional, List
import logging
from .utils import logger

try:
    from llama_index.tools.wikipedia import WikipediaToolSpec
    WIKIPEDIA_AVAILABLE = True
except ImportError:
    logger.warning("LlamaIndex Wikipedia tools not available. Install with: pip install llama-index-tools-wikipedia")
    WIKIPEDIA_AVAILABLE = False

class WikipediaTools:
    """Wikipedia search and page retrieval tools using LlamaIndex"""
    
    def __init__(self):
        if not WIKIPEDIA_AVAILABLE:
            raise ImportError("LlamaIndex Wikipedia tools not installed")
        
        # Initialize the LlamaIndex Wikipedia tool spec
        self.tool_spec = WikipediaToolSpec()
        logger.info("Wikipedia tools initialized successfully")
    
    def search_wikipedia(self, query: str, language: str = 'en') -> str:
        """
        Search Wikipedia for pages related to a query
        Based on search results showing search_data method
        
        Args:
            query: Search term
            language: Wikipedia language (default: 'en')
            
        Returns:
            Search results from Wikipedia
        """
        try:
            logger.info(f"Searching Wikipedia for: {query}")
            
            # Use LlamaIndex's search_data method (from search results)
            results = self.tool_spec.search_data(query=query, lang=language)
            
            if results:
                logger.info(f"Found Wikipedia results for: {query}")
                return results
            else:
                return f"No Wikipedia results found for: {query}"
                
        except Exception as e:
            error_msg = f"Error searching Wikipedia: {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def get_wikipedia_page(self, page_title: str, language: str = 'en') -> str:
        """
        Retrieve a specific Wikipedia page
        Based on search results showing load_data method
        
        Args:
            page_title: Title of the Wikipedia page
            language: Wikipedia language (default: 'en')
            
        Returns:
            Content of the Wikipedia page
        """
        try:
            logger.info(f"Retrieving Wikipedia page: {page_title}")
            
            # Use LlamaIndex's load_data method (from search results)
            content = self.tool_spec.load_data(page=page_title, lang=language)
            
            if content:
                logger.info(f"Successfully retrieved Wikipedia page: {page_title}")
                return content
            else:
                return f"Wikipedia page not found: {page_title}"
                
        except Exception as e:
            error_msg = f"Error retrieving Wikipedia page '{page_title}': {str(e)}"
            logger.error(error_msg)
            return error_msg
    
    def search_and_summarize(self, query: str, language: str = 'en') -> str:
        """
        Search Wikipedia and get a focused summary
        Combines search and page retrieval for better results
        """
        try:
            # First search for relevant pages
            search_results = self.search_wikipedia(query, language)
            
            if "No Wikipedia results found" in search_results:
                return search_results
            
            # Extract the first few sentences for a summary
            # This gives us the most relevant information without overwhelming the LLM
            lines = search_results.split('\n')
            summary_lines = [line for line in lines[:10] if line.strip()]
            summary = '\n'.join(summary_lines)
            
            # Truncate if too long (to stay within token limits)
            if len(summary) > 2000:
                summary = summary[:2000] + "..."
            
            return summary
            
        except Exception as e:
            error_msg = f"Error in Wikipedia search and summarize: {str(e)}"
            logger.error(error_msg)
            return error_msg

# Convenience functions for direct use
def search_wikipedia(query: str, language: str = 'en') -> str:
    """Standalone function to search Wikipedia"""
    if not WIKIPEDIA_AVAILABLE:
        return "Wikipedia tools not available. Please install llama-index-tools-wikipedia"
    
    tools = WikipediaTools()
    return tools.search_wikipedia(query, language)

def get_wikipedia_page(page_title: str, language: str = 'en') -> str:
    """Standalone function to get a Wikipedia page"""
    if not WIKIPEDIA_AVAILABLE:
        return "Wikipedia tools not available. Please install llama-index-tools-wikipedia"
    
    tools = WikipediaTools()
    return tools.get_wikipedia_page(page_title, language)

def wikipedia_summary(query: str, language: str = 'en') -> str:
    """Standalone function to get a Wikipedia summary"""
    if not WIKIPEDIA_AVAILABLE:
        return "Wikipedia tools not available. Please install llama-index-tools-wikipedia"
    
    tools = WikipediaTools()
    return tools.search_and_summarize(query, language)