Spaces:
Runtime error
Runtime error
| import polars as pl | |
| import pytesseract | |
| from langchain_community.document_loaders import ArxivLoader, WikipediaLoader | |
| from langchain_community.tools.tavily_search import TavilySearchResults | |
| from langchain_core.tools import tool | |
| from PIL import Image | |
| # --------- Basic Math tools --------- | |
| def add(a: float, b: float) -> float: | |
| """ | |
| Add two numbers. | |
| Args: | |
| a (float): the first number | |
| b (float): the second number | |
| """ | |
| return a + b | |
| def subtract(a: float, b: float) -> int: | |
| """ | |
| Subtract two numbers. | |
| Args: | |
| a (float): the first number | |
| b (float): the second number | |
| """ | |
| return a - b | |
| def multiply(a: float, b: float) -> float: | |
| """ | |
| Multiplies two numbers. | |
| Args: | |
| a (float): the first number | |
| b (float): the second number | |
| """ | |
| return a * b | |
| def divide(a: float, b: float) -> float: | |
| """ | |
| Divides two numbers. | |
| Args: | |
| a (float): the first float number | |
| b (float): the second float number | |
| """ | |
| if b == 0: | |
| raise ValueError("Cannot divided by zero.") | |
| return a / b | |
| def modulus(a: int, b: int) -> int: | |
| """ | |
| Get the modulus of two numbers. | |
| Args: | |
| a (int): the first number | |
| b (int): the second number | |
| """ | |
| return a % b | |
| def power(a: float, b: float) -> float: | |
| """ | |
| Get the power of two numbers. | |
| Args: | |
| a (float): the first number | |
| b (float): the second number | |
| """ | |
| return a**b | |
| # ------- Search Tools ------- | |
| def arxiv_search(query: str) -> str: | |
| """Search Arxiv for a query and return maximum 3 result. | |
| Args: | |
| query: The search query.""" | |
| search_docs = ArxivLoader(query=query, load_max_docs=3).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' | |
| for doc in search_docs | |
| ] | |
| ) | |
| return {"arxiv_results": formatted_search_docs} | |
| def web_search(query: str) -> str: | |
| """Search the Web via Tavily for a query and return 3 results in maximum. | |
| Args: | |
| query: The search query.""" | |
| search_docs = TavilySearchResults(max_results=3).invoke(query) | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata.get("url", "")}" title="{doc.get("title", "")}"/>\n{doc.get("content", "")}\n</Document>' | |
| for doc in search_docs | |
| ] | |
| ) | |
| return {"web_results": formatted_search_docs} | |
| def wikipedia_search(query: str) -> str: | |
| """Search Wikipedia for a query and return maximum 3 results. | |
| Args: | |
| query: The search query.""" | |
| search_docs = WikipediaLoader(query=query, load_max_docs=3).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' | |
| for doc in search_docs | |
| ]) | |
| return {"wiki_results": formatted_search_docs} | |
| # ------ Document Processing Tools ------ | |
| def extract_text_from_image(image_path: str) -> str: | |
| """ | |
| Extract text from an image by using pytesseract via OCR. | |
| Args: | |
| image_path (str): the path to the image file. | |
| """ | |
| try: | |
| image = Image.open(image_path) | |
| text = pytesseract.image_to_string(image) | |
| return f"Extracted the text from image:\n\n{text}" | |
| except Exception as e: | |
| return f"Error extracting text from image: {str(e)}" | |
| def analyze_csv_file(file_path: str, query: str) -> str: | |
| """ | |
| Analyze a CSV file by using Polars and answer a question about it. | |
| Args: | |
| file_path (str): the path to the CSV file. | |
| query (str): Question about the data | |
| """ | |
| try: | |
| df = pl.read_csv(file_path) | |
| result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
| result += f"Columns: {', '.join(df.columns)}\n\n" | |
| result += "Summary statistics:\n" | |
| result += str(df.describe()) | |
| return result | |
| except Exception as e: | |
| return f"Error occured analyzing CSV file: {str(e)}" | |
| def analyze_excel_file(file_path: str, query: str) -> str: | |
| """ | |
| Analyze an Excel file using Polars and answer a question about it. | |
| Args: | |
| file_path (str): the path to the Excel file. | |
| query (str): Question about the data | |
| """ | |
| try: | |
| df = pl.read_excel(file_path) | |
| result = ( | |
| f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
| ) | |
| result += f"Columns: {', '.join(df.columns)}\n\n" | |
| result += "Summary statistics:\n" | |
| result += str(df.describe()) | |
| return result | |
| except Exception as e: | |
| return f"Error occured analyzing Excel file: {str(e)}" | |
| tools = [ | |
| multiply, | |
| add, | |
| subtract, | |
| divide, | |
| modulus, | |
| power, | |
| web_search, | |
| wikipedia_search, | |
| arxiv_search, | |
| extract_text_from_image, | |
| analyze_csv_file, | |
| analyze_excel_file, | |
| ] | |