Spaces:
Running
Running
| """ | |
| Base class for all research tools | |
| """ | |
| from abc import ABC, abstractmethod | |
| from typing import Dict, Any, Optional | |
| import time | |
| import re | |
| from datetime import datetime | |
| class BaseTool(ABC): | |
| """Base class for all research tools""" | |
| def __init__(self, name: str, description: str): | |
| self.name = name | |
| self.description = description | |
| self.last_request_time = 0 | |
| self.rate_limit_delay = 1.0 # seconds between requests | |
| def search(self, query: str, **kwargs) -> str: | |
| """Main search method - must be implemented by subclasses""" | |
| pass | |
| def rate_limit(self): | |
| """Simple rate limiting to be respectful to APIs""" | |
| current_time = time.time() | |
| time_since_last = current_time - self.last_request_time | |
| if time_since_last < self.rate_limit_delay: | |
| time.sleep(self.rate_limit_delay - time_since_last) | |
| self.last_request_time = time.time() | |
| def score_research_quality(self, research_result: str, source: str = "web") -> Dict[str, float]: | |
| """Score research based on multiple quality indicators""" | |
| quality_score = { | |
| "recency": self._check_recency(research_result), | |
| "authority": self._check_authority(research_result, source), | |
| "specificity": self._check_specificity(research_result), | |
| "relevance": self._check_relevance(research_result), | |
| "overall": 0.0 | |
| } | |
| # Weighted overall score | |
| weights = {"recency": 0.2, "authority": 0.3, "specificity": 0.3, "relevance": 0.2} | |
| quality_score["overall"] = sum(quality_score[metric] * weight for metric, weight in weights.items()) | |
| return quality_score | |
| def _check_recency(self, text: str) -> float: | |
| """Check for recent dates and current information""" | |
| if not text: | |
| return 0.3 | |
| # Look for years | |
| years = re.findall(r'\b(20\d{2})\b', text) | |
| if years: | |
| latest_year = max(int(year) for year in years) | |
| current_year = datetime.now().year | |
| recency = max(0, 1 - (current_year - latest_year) / 10) # Decay over 10 years | |
| return recency | |
| return 0.3 # Default for no date found | |
| def _check_authority(self, text: str, source: str) -> float: | |
| """Check source authority and credibility indicators""" | |
| authority_indicators = { | |
| 'arxiv': 0.9, | |
| 'sec': 0.95, | |
| 'github': 0.7, | |
| 'wikipedia': 0.8, | |
| 'web': 0.5 | |
| } | |
| base_score = authority_indicators.get(source.lower(), 0.5) | |
| # Look for credibility markers in text | |
| if text: | |
| credibility_markers = ['study', 'research', 'university', 'published', 'peer-reviewed', 'official'] | |
| marker_count = sum(1 for marker in credibility_markers if marker in text.lower()) | |
| credibility_boost = min(0.3, marker_count * 0.05) | |
| base_score += credibility_boost | |
| return min(1.0, base_score) | |
| def _check_specificity(self, text: str) -> float: | |
| """Check for specific data points and quantitative information""" | |
| if not text: | |
| return 0.1 | |
| # Count numbers, percentages, specific metrics | |
| numbers = len(re.findall(r'\b\d+(?:\.\d+)?%?\b', text)) | |
| specific_terms = len(re.findall(r'\b(?:exactly|precisely|specifically|measured|calculated)\b', text, re.IGNORECASE)) | |
| specificity = min(1.0, (numbers * 0.02) + (specific_terms * 0.1)) | |
| return max(0.1, specificity) # Minimum baseline | |
| def _check_relevance(self, text: str) -> float: | |
| """Check relevance to query (simplified implementation)""" | |
| # This would ideally use the original query for comparison | |
| # For now, return a baseline that could be enhanced | |
| return 0.7 # Placeholder - could be enhanced with query matching | |
| def should_use_for_query(self, query: str) -> bool: | |
| """Determine if this tool should be used for the given query""" | |
| # Default implementation - override in subclasses for smart routing | |
| return True | |
| def extract_key_info(self, text: str) -> Dict[str, Any]: | |
| """Extract key information from research results""" | |
| if not text: | |
| return {} | |
| return { | |
| 'length': len(text), | |
| 'has_numbers': bool(re.search(r'\d+', text)), | |
| 'has_dates': bool(re.search(r'\b20\d{2}\b', text)), | |
| 'has_urls': bool(re.search(r'http[s]?://', text)) | |
| } | |
| def format_error_response(self, query: str, error: str) -> str: | |
| """Format a consistent error response""" | |
| return f"**{self.name} Research for: {query}**\n\nResearch temporarily unavailable: {str(error)[:100]}..." |