infersense

Runtime error

infersense / tools.py

semioz

init1

13ba8fa 4 months ago

5.14 kB

	import polars as pl
	import pytesseract
	from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
	from langchain_community.tools.tavily_search import TavilySearchResults
	from langchain_core.tools import tool
	from PIL import Image

	# --------- Basic Math tools ---------

	@tool
	def add(a: float, b: float) -> float:
	"""
	Add two numbers.
	Args:
	a (float): the first number
	b (float): the second number
	"""
	return a + b

	@tool
	def subtract(a: float, b: float) -> int:
	"""
	Subtract two numbers.
	Args:
	a (float): the first number
	b (float): the second number
	"""
	return a - b

	@tool
	def multiply(a: float, b: float) -> float:
	"""
	Multiplies two numbers.
	Args:
	a (float): the first number
	b (float): the second number
	"""
	return a * b

	@tool
	def divide(a: float, b: float) -> float:
	"""
	Divides two numbers.
	Args:
	a (float): the first float number
	b (float): the second float number
	"""
	if b == 0:
	raise ValueError("Cannot divided by zero.")
	return a / b


	@tool
	def modulus(a: int, b: int) -> int:
	"""
	Get the modulus of two numbers.
	Args:
	a (int): the first number
	b (int): the second number
	"""
	return a % b


	@tool
	def power(a: float, b: float) -> float:
	"""
	Get the power of two numbers.
	Args:
	a (float): the first number
	b (float): the second number
	"""
	return a**b

	# ------- Search Tools -------

	@tool
	def arxiv_search(query: str) -> str:
	"""Search Arxiv for a query and return maximum 3 result.
	Args:
	query: The search query."""
	search_docs = ArxivLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
	for doc in search_docs
	]
	)
	return {"arxiv_results": formatted_search_docs}

	@tool
	def web_search(query: str) -> str:
	"""Search the Web via Tavily for a query and return 3 results in maximum.
	Args:
	query: The search query."""
	search_docs = TavilySearchResults(max_results=3).invoke(query)
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>'
	for doc in search_docs
	]
	)
	return {"web_results": formatted_search_docs}

	@tool
	def wikipedia_search(query: str) -> str:
	"""Search Wikipedia for a query and return maximum 3 results.
	Args:
	query: The search query."""
	search_docs = WikipediaLoader(query=query, load_max_docs=3).load()
	formatted_search_docs = "\n\n---\n\n".join(
	[
	f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
	for doc in search_docs
	])
	return {"wiki_results": formatted_search_docs}

	# ------ Document Processing Tools ------

	@tool
	def extract_text_from_image(image_path: str) -> str:
	"""
	Extract text from an image by using pytesseract via OCR.
	Args:
	image_path (str): the path to the image file.
	"""
	try:
	image = Image.open(image_path)
	text = pytesseract.image_to_string(image)

	return f"Extracted the text from image:\n\n{text}"
	except Exception as e:
	return f"Error extracting text from image: {str(e)}"


	@tool
	def analyze_csv_file(file_path: str, query: str) -> str:
	"""
	Analyze a CSV file by using Polars and answer a question about it.
	Args:
	file_path (str): the path to the CSV file.
	query (str): Question about the data
	"""
	try:
	df = pl.read_csv(file_path)

	result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
	result += f"Columns: {', '.join(df.columns)}\n\n"

	result += "Summary statistics:\n"
	result += str(df.describe())

	return result

	except Exception as e:
	return f"Error occured analyzing CSV file: {str(e)}"


	@tool
	def analyze_excel_file(file_path: str, query: str) -> str:
	"""
	Analyze an Excel file using Polars and answer a question about it.
	Args:
	file_path (str): the path to the Excel file.
	query (str): Question about the data
	"""
	try:
	df = pl.read_excel(file_path)

	result = (
	f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
	)
	result += f"Columns: {', '.join(df.columns)}\n\n"

	result += "Summary statistics:\n"
	result += str(df.describe())

	return result

	except Exception as e:
	return f"Error occured analyzing Excel file: {str(e)}"


	tools = [
	multiply,
	add,
	subtract,
	divide,
	modulus,
	power,
	web_search,
	wikipedia_search,
	arxiv_search,
	extract_text_from_image,
	analyze_csv_file,
	analyze_excel_file,
	]