|
import io, os, mimetypes, requests, pandas as pd |
|
from langchain.tools import StructuredTool |
|
|
|
def _load_dataset(url_or_path: str, head_only: bool = True) -> str: |
|
"""Fetch a CSV/JSON file (local or remote) and return either |
|
the first ~5 rows or basic stats.""" |
|
|
|
if url_or_path.startswith(("http://", "https://")): |
|
resp = requests.get(url_or_path, timeout=10) |
|
resp.raise_for_status() |
|
raw = io.BytesIO(resp.content) |
|
else: |
|
raw = open(os.path.expanduser(url_or_path), "rb") |
|
|
|
|
|
mime = mimetypes.guess_type(url_or_path)[0] or "" |
|
if "json" in mime or url_or_path.lower().endswith(".json"): |
|
df = pd.read_json(raw) |
|
elif "csv" in mime or url_or_path.lower().endswith(".csv"): |
|
df = pd.read_csv(raw) |
|
else: |
|
raise ValueError("Only CSV or JSON supported.") |
|
|
|
if head_only: |
|
return df.head().to_markdown(index=False) |
|
return df.describe(include="all").to_markdown() |
|
|
|
file_loader_tool = StructuredTool.from_function( |
|
name="load_dataset", |
|
description=( |
|
"Load a CSV or JSON file from a URL or local path. " |
|
"Returns a markdown preview of the data." |
|
), |
|
func=_load_dataset, |
|
return_direct=True, |
|
) |
|
|