|
from __future__ import annotations |
|
|
|
"""Comprehensive, **key‑free** toolset for the LangGraph agent. |
|
Every tool is fully self‑contained and safe to run inside a public |
|
Hugging Face Space – no private API keys required. |
|
Capabilities covered (GAIA L1): |
|
• general web search • Wikipedia summary • light image inspection |
|
• basic math • current time • YouTube video metadata |
|
• inspect Excel files |
|
""" |
|
|
|
import datetime |
|
import io |
|
import math |
|
import pathlib |
|
import re |
|
import statistics |
|
from typing import List, Dict, Any |
|
|
|
import pandas as pd |
|
import requests |
|
from bs4 import BeautifulSoup |
|
from langchain.tools import tool, BaseTool |
|
from PIL import Image |
|
from pytube import YouTube |
|
|
|
|
|
|
|
|
|
|
|
|
|
def _html_text(soup: BeautifulSoup) -> str: |
|
return re.sub(r"\s+", " ", soup.get_text(" ").strip()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool |
|
def get_current_time() -> str: |
|
"""Return the current UTC time (ISO‑8601).""" |
|
return datetime.datetime.utcnow().isoformat() |
|
|
|
|
|
@tool |
|
def calculator(expression: str) -> str: |
|
"""Evaluate an arithmetic **expression** (e.g. "2 + 2*3"). |
|
Supported tokens: numbers, + ‑ * / ** % ( ), and constants/funcs from math.* |
|
Returns the result or an error message. |
|
""" |
|
|
|
allowed_names = { |
|
k: v for k, v in math.__dict__.items() if not k.startswith("__") |
|
} |
|
try: |
|
result = eval(expression, {"__builtins__": {}}, allowed_names) |
|
return str(result) |
|
except Exception as exc: |
|
return f"Error: {exc}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
_DDG_URL = "https://duckduckgo.com/html/" |
|
_WIKI_API = "https://en.wikipedia.org/api/rest_v1/page/summary/{}" |
|
|
|
|
|
@tool |
|
def web_search(query: str, max_results: int = 6) -> List[Dict[str, str]]: |
|
"""Return *max_results* DuckDuckGo hits for **query**. |
|
Each hit is a dict with keys: title, url, snippet. |
|
""" |
|
params = {"q": query, "s": "0"} |
|
html = requests.post(_DDG_URL, data=params, timeout=10).text |
|
soup = BeautifulSoup(html, "lxml") |
|
|
|
results = [] |
|
for a in soup.select("a.result__a", limit=max_results): |
|
title = _html_text(a) |
|
url = a["href"] |
|
snippet_tag = a.find_parent(class_="result").select_one(".result__snippet") |
|
snippet = _html_text(snippet_tag) if snippet_tag else "" |
|
results.append({"title": title, "url": url, "snippet": snippet}) |
|
return results |
|
|
|
|
|
@tool |
|
def wikipedia_summary(title: str) -> str: |
|
"""Return the lead paragraph of a Wikipedia page by **title**.""" |
|
url = _WIKI_API.format(requests.utils.quote(title)) |
|
resp = requests.get(url, timeout=10) |
|
if resp.status_code != 200: |
|
return f"Error: page '{title}' not found." |
|
data = resp.json() |
|
return data.get("extract", "No extract available.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool |
|
def youtube_info(url: str) -> Dict[str, Any]: |
|
"""Fetch basic metadata (title, length, author, views) of a YouTube video.""" |
|
try: |
|
yt = YouTube(url) |
|
except Exception as exc: |
|
return {"error": str(exc)} |
|
return { |
|
"title": yt.title, |
|
"author": yt.author, |
|
"length_sec": yt.length, |
|
"views": yt.views, |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool |
|
def image_info(path: str) -> Dict[str, Any]: |
|
"""Return basic stats for an image file at **path** (W×H, mode, format, mean |
|
pixel value per channel).""" |
|
p = pathlib.Path(path) |
|
if not p.exists(): |
|
return {"error": "file not found"} |
|
|
|
try: |
|
with Image.open(p) as im: |
|
pixels = list(im.getdata()) |
|
except Exception as exc: |
|
return {"error": str(exc)} |
|
|
|
|
|
if isinstance(pixels[0], (tuple, list)): |
|
channels = list(zip(*pixels)) |
|
means = [statistics.mean(c) for c in channels] |
|
else: |
|
means = [statistics.mean(pixels)] |
|
|
|
return { |
|
"width": im.width, |
|
"height": im.height, |
|
"format": im.format, |
|
"mode": im.mode, |
|
"mean_pixel": means, |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool |
|
def excel_preview(path: str, sheet: str | int = 0, nrows: int = 5) -> str: |
|
"""Return the first *nrows* rows of an Excel sheet as a markdown table.""" |
|
p = pathlib.Path(path) |
|
if not p.exists(): |
|
return "Error: file not found." |
|
|
|
try: |
|
df = pd.read_excel(p, sheet_name=sheet, engine="openpyxl", nrows=nrows) |
|
except Exception as exc: |
|
return f"Error: {exc}" |
|
|
|
return df.to_markdown(index=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
TOOLS: List[BaseTool] = [ |
|
get_current_time, |
|
calculator, |
|
web_search, |
|
wikipedia_summary, |
|
youtube_info, |
|
image_info, |
|
excel_preview, |
|
] |