|
from langchain_core.tools import tool |
|
import os |
|
from typing import Optional |
|
import tempfile |
|
import requests |
|
from urllib.parse import urlparse |
|
import pytesseract |
|
from PIL import Image |
|
import pandas as pd |
|
import uuid |
|
|
|
@tool |
|
def save_and_read_file(content: str, filename: Optional[str] = None) -> str: |
|
""" |
|
Save content to a file and return the path. |
|
Args: |
|
content (str): the content to save to the file |
|
filename (str, optional): the name of the file. If not provided, a random name file will be created. |
|
""" |
|
temp_dir = tempfile.gettempdir() |
|
if filename is None: |
|
temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir) |
|
filepath = temp_file.name |
|
else: |
|
filepath = os.path.join(temp_dir, filename) |
|
|
|
with open(filepath, "w") as f: |
|
f.write(content) |
|
|
|
return f"File saved to {filepath}. You can read this file to process its contents." |
|
|
|
@tool |
|
def download_file_from_url(url: str, filename: Optional[str] = None) -> str: |
|
""" |
|
Download a file from a URL and save it to a temporary location. |
|
Args: |
|
url (str): the URL of the file to download. |
|
filename (str, optional): the name of the file. If not provided, a random name file will be created. |
|
""" |
|
try: |
|
if not filename: |
|
path = urlparse(url).path |
|
filename = os.path.basename(path) |
|
if not filename: |
|
filename = f"downloaded_{uuid.uuid4().hex[:8]}" |
|
|
|
temp_dir = tempfile.gettempdir() |
|
filepath = os.path.join(temp_dir, filename) |
|
|
|
response = requests.get(url, stream=True) |
|
response.raise_for_status() |
|
|
|
|
|
with open(filepath, "wb") as f: |
|
for chunk in response.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
|
|
return f"File downloaded to {filepath}. You can read this file to process its contents." |
|
except Exception as e: |
|
return f"Error downloading file: {str(e)}" |
|
|
|
@tool |
|
def extract_text_from_image(image_path: str) -> str: |
|
""" |
|
Extract text from an image using OCR library pytesseract (if available). |
|
Args: |
|
image_path (str): the path to the image file. |
|
""" |
|
try: |
|
image = Image.open(image_path) |
|
|
|
|
|
text = pytesseract.image_to_string(image) |
|
|
|
return f"Extracted text from image:\n\n{text}" |
|
except Exception as e: |
|
return f"Error extracting text from image: {str(e)}" |
|
|
|
@tool |
|
def analyze_csv_file(file_path: str, query: str) -> str: |
|
""" |
|
Analyze a CSV file using pandas and answer a question about it. |
|
Args: |
|
file_path (str): the path to the CSV file. |
|
query (str): Question about the data |
|
""" |
|
try: |
|
df = pd.read_csv(file_path) |
|
|
|
result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n" |
|
result += f"Columns: {', '.join(df.columns)}\n\n" |
|
|
|
result += "Summary statistics:\n" |
|
result += str(df.describe()) |
|
|
|
return result |
|
|
|
except Exception as e: |
|
return f"Error analyzing CSV file: {str(e)}" |
|
|
|
@tool |
|
def analyze_excel_file(file_path: str, query: str) -> str: |
|
""" |
|
Analyze an Excel file using pandas and answer a question about it. |
|
Args: |
|
file_path (str): the path to the Excel file. |
|
query (str): Question about the data |
|
""" |
|
try: |
|
df = pd.read_excel(file_path) |
|
|
|
result = ( |
|
f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" |
|
) |
|
result += f"Columns: {', '.join(df.columns)}\n\n" |
|
|
|
result += "Summary statistics:\n" |
|
result += str(df.describe()) |
|
|
|
return result |
|
|
|
except Exception as e: |
|
return f"Error analyzing Excel file: {str(e)}" |
|
|