Spaces:
Sleeping
Sleeping
import requests | |
from langchain.tools import tool | |
from duckduckgo_search import DDGS | |
from bs4 import BeautifulSoup | |
import tempfile | |
from typing import Optional | |
import os | |
from urllib.parse import urlparse | |
def search(query: str) -> str: | |
"""Searches the internet using DuckDuckGo | |
Args: | |
query (str): Search query | |
Returns: | |
str: Search results | |
""" | |
with DDGS() as ddgs: | |
results = [r for r in ddgs.text(query, max_results=5)] | |
return results if results else "No results found." | |
def process_content(url: str) -> str: | |
"""Process content from a webpage | |
Args: | |
url (str): URL to get content | |
Returns: | |
str: Content in the webpage | |
""" | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, "html.parser") | |
return soup.get_text() | |
def save_file(content: str, filename: Optional[str] = None) -> str: | |
""" | |
Save content to a temporary file and return the path. | |
Useful for processing files from the GAIA API. | |
Args: | |
content: The content to save to the file | |
filename: Optional filename, will generate a random name if not provided | |
Returns: | |
Path to the saved file | |
""" | |
temp_dir = tempfile.gettempdir() | |
if filename is None: | |
temp_file = tempfile.NamedTemporaryFile(delete=False) | |
filepath = temp_file.name | |
else: | |
filepath = os.path.join(temp_dir, filename) | |
# Write content to the file | |
with open(filepath, "w") as f: | |
f.write(content) | |
return f"File saved to {filepath}. You can read this file to process its contents." | |
def download_file_from_url(url: str, filename: Optional[str] = None) -> str: | |
""" | |
Download a file from a URL and save it to a temporary location. | |
Args: | |
url: The URL to download from | |
filename: Optional filename, will generate one based on URL if not provided | |
Returns: | |
Path to the downloaded file | |
""" | |
try: | |
# Parse URL to get filename if not provided | |
if not filename: | |
path = urlparse(url).path | |
filename = os.path.basename(path) | |
if not filename: | |
# Generate a random name if we couldn't extract one | |
import uuid | |
filename = f"downloaded_{uuid.uuid4().hex[:8]}" | |
# Create temporary file | |
temp_dir = tempfile.gettempdir() | |
filepath = os.path.join(temp_dir, filename) | |
# Download the file | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
# Save the file | |
with open(filepath, "wb") as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
return f"File downloaded to {filepath}. You can now process this file." | |
except Exception as e: | |
return f"Error downloading file: {str(e)}" | |
def extract_text_from_image(image_path: str) -> str: | |
""" | |
Extract text from an image using pytesseract (if available). | |
Args: | |
image_path: Path to the image file | |
Returns: | |
Extracted text or error message | |
""" | |
try: | |
# Try to import pytesseract | |
import pytesseract | |
from PIL import Image | |
# Open the image | |
image = Image.open(image_path) | |
# Extract text | |
text = pytesseract.image_to_string(image) | |
return f"Extracted text from image:\n\n{text}" | |
except ImportError: | |
return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system." | |
except Exception as e: | |
return f"Error extracting text from image: {str(e)}" | |
def analyze_csv_file(file_path: str, query: str) -> str: | |
""" | |
Analyze a CSV file using pandas and answer a question about it. | |
Args: | |
file_path: Path to the CSV file | |
query: Question about the data | |
Returns: | |
Analysis result or error message | |
""" | |
try: | |
import pandas as pd | |
# Read the CSV file | |
df = pd.read_csv(file_path) | |
# Run various analyses based on the query | |
result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
result += f"Columns: {', '.join(df.columns)}\n\n" | |
# Add summary statistics | |
result += "Summary statistics:\n" | |
result += str(df.describe()) | |
return result | |
except ImportError: | |
return "Error: pandas is not installed. Please install it with 'pip install pandas'." | |
except Exception as e: | |
return f"Error analyzing CSV file: {str(e)}" | |
def analyze_excel_file(file_path: str, query: str) -> str: | |
""" | |
Analyze an Excel file using pandas and answer a question about it. | |
Args: | |
file_path: Path to the Excel file | |
query: Question about the data | |
Returns: | |
Analysis result or error message | |
""" | |
try: | |
import pandas as pd | |
# Read the Excel file | |
df = pd.read_excel(file_path) | |
# Run various analyses based on the query | |
result = ( | |
f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
) | |
result += f"Columns: {', '.join(df.columns)}\n\n" | |
# Add summary statistics | |
result += "Summary statistics:\n" | |
result += str(df.describe()) | |
return result | |
except ImportError: | |
return "Error: pandas and openpyxl are not installed. Please install them with 'pip install pandas openpyxl'." | |
except Exception as e: | |
return f"Error analyzing Excel file: {str(e)}" | |
def get_tools(): | |
return [ | |
search, | |
# process_content, | |
# save_file, | |
# download_file_from_url, | |
# extract_text_from_image, | |
# analyze_csv_file, | |
# analyze_excel_file | |
] | |