Final_Assignment_Template

Build error

App Files Files Community

Artem Zhirkevich commited on May 4

Commit

ea48b73

1 Parent(s): f590bb2

refactor agent, tools, and libs

Browse files

Files changed (5) hide show

agent.py +11 -222
app.py +2 -2
dry_run.py +3 -3
requirements.txt +1 -16
tools.py +213 -0

agent.py CHANGED Viewed

@@ -1,247 +1,36 @@
 import os
 import time
-import tempfile
-import requests
-import pytesseract
-import wikipedia
-import mwclient
-import pandas as pd
-import easyocr
-from typing import List, Optional, Dict, Any
-from urllib.parse import urlparse
 from dotenv import load_dotenv
-from PIL import Image
-from tavily import TavilyClient
-from arxiv import Search, Client, SortCriterion, SortOrder
 from langgraph.graph.state import CompiledStateGraph
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
-from langchain_groq import ChatGroq
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain.memory import ConversationBufferMemory
 from langchain.tools import Tool, tool
 from langchain.callbacks.tracers import ConsoleCallbackHandler
-from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
-from langchain_community.utilities import WikipediaAPIWrapper
-from langchain_experimental.utilities import PythonREPL
-from langchain_community.document_loaders import WebBaseLoader
-load_dotenv()
-vision_llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", groq_api_key=os.getenv('GROQ_API_KEY'))
-@tool
-def web_search(query: str, domain: Optional[str] = None) -> str:
-    """
-    Perform a web search and return the raw results as a string.
-    Args:
-        query (str): The search query.
-        domain (Optional[str]): If provided, restricts the search to this domain.
-    Returns:
-        str: Raw search results concatenated into a string.
-    """
-    try:
-        time.sleep(2)
-        search = DuckDuckGoSearchAPIWrapper()
-        if domain:
-            query = f"{query} site:{domain}"
-        results = search.results(query, max_results=3)
-        if not results:
-            return "No results found."
-        # Format into simple title + snippet
-        formatted = ""
-        for r in results:
-            formatted += f"Title: {r['title']}\nURL: {r['link']}\nSnippet: {r['snippet']}\n\n"
-        return formatted.strip()
-    except Exception as e:
-        return f"Search error: {e}"
-@tool
-def visit_webpage(url: str):
-    """
-    Fetches and loads the content of a webpage given its URL.
-    Parameters:
-        url (str): The URL of the webpage to be visited.
-    Returns:
-        str: A string containing the loaded content of the webpage.
-    """
-    # Initialize a WebBaseLoader with the provided URL
-    loader = WebBaseLoader(url)
-    # Set requests_kwargs to disable SSL certificate verification
-    # This can help bypass SSL certificate errors but should be used cautiously
-    loader.requests_kwargs = {'verify': False}
-    # Load the webpage content using the loader
-    docs = loader.load()
-    # Return the loaded content formatted as a string
-    return f"Page content: {docs}"
-@tool
-def wikipedia_search(query: str, max_docs: int = 1) -> str:
-    """
-    Search Wikipedia using mwclient and return exactly `max_docs` results.
-    Args:
-        query (str): The search query.
-        max_docs (int): Number of results to return. Default is 1.
-    """
-    try:
-        time.sleep(2)
-        site = mwclient.Site("en.wikipedia.org")
-        results = site.search(query, limit=max_docs)
-        output = ""
-        count = 0
-        for page_info in results:
-            title = page_info["title"]
-            try:
-                page = site.pages[title]
-                content = page.text()
-                first_paragraph = content.split('\n\n')[0]
-                url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
-                output += (
-                    f"--- Result {count + 1} ---\n"
-                    f"Title: {title}\n"
-                    f"Summary: {first_paragraph}...\n"
-                    f"URL: {url}\n\n"
-                )
-                count += 1
-                if count >= max_docs:
-                    break
-            except Exception:
-                continue
-        return output.strip() or "No valid matching pages found."
-    except Exception as e:
-        return f"Wikipedia search error: {str(e)}"
-@tool
-def extract_text_from_image(image_path: str) -> str:
-    """
-    Extracts text from an image file.
-    Args:
-        image_path (str): The file path to the image
-                          (e.g., '/path/to/document.png').
-    Returns:
-        str: Extracted text paragraphs separated by newlines,
-             prefixed with "Extracted text:\n". Returns an error message
-             string starting with 'Error:' on failure.
-    """
-    try:
-        time.sleep(2)
-        with open(image_path, "rb") as image_file:
-            image_bytes = image_file.read()
-        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
-        message = [
-            HumanMessage(
-                content=[
-                    {
-                        "type": "text",
-                        "text": (
-                            "Extract text or provide explanation of this image"
-                        ),
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/png;base64,{image_base64}"
-                        },
-                    },
-                ]
-            )
-        ]
-        response = vision_llm.invoke(message)
-        all_text = response.content + "\n\n"
-        return all_text.strip()
-    except Exception as e:
-        # A butler should handle errors gracefully
-        error_msg = f"Error extracting text: {str(e)}"
-        print(error_msg)
-        return ""
-@tool
-def analyze_file(file_path: str) -> str:
-    """
-    Load and analyze a CSV or Excel file using pandas.
-    Provides basic metadata and summary statistics for numeric columns.
-    Args:
-        file_path (str): Path to the CSV or Excel file.
-    Returns:
-        str: Summary statistics and metadata about the file data.
-    """
-    try:
-        # Determine file type
-        _, ext = os.path.splitext(file_path.lower())
-        if ext == '.csv':
-            df = pd.read_csv(file_path)
-        elif ext in ['.xls', '.xlsx']:
-            df = pd.read_excel(file_path)
-        else:
-            return f"Error: Unsupported file extension '{ext}'. Supported: .csv, .xls, .xlsx"
-        result = "Summary statistics for numeric columns:\n"
-        result += str(df.describe())
-        result += "\n\n"
-        result += f"Columns: {', '.join(df.columns)}\n\n"
-        result += "Content:\n"
-        result += df.astype(str).head(1000).to_string(index=False)
-        return result
-    except ImportError:
-        return "Error: Required libraries are not installed. Install with 'pip install pandas openpyxl'."
-    except FileNotFoundError:
-        return f"Error: File not found at path '{file_path}'."
-    except Exception as e:
-        return f"Error analyzing file: {str(e)}"
-class Agent:
     _api_key: str
     _model_name: str
     _tools: List[Tool]
-    _memory: ConversationBufferMemory
     _llm: ChatGoogleGenerativeAI
     _graph: CompiledStateGraph

 import os
 import time
+from typing import List
 from dotenv import load_dotenv
 from langgraph.graph.state import CompiledStateGraph
 from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain.tools import Tool, tool
 from langchain.callbacks.tracers import ConsoleCallbackHandler
+from tools import (
+    web_search,
+    visit_webpage,
+    wikipedia_search,
+    extract_text_from_image,
+    analyze_file,
+)
+load_dotenv()
+class GeminiAgent:
     _api_key: str
     _model_name: str
     _tools: List[Tool]
     _llm: ChatGoogleGenerativeAI
     _graph: CompiledStateGraph

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-from agent import Agent
 from evaluation_api import EvaluationApi
@@ -41,7 +41,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = Agent()
     except Exception as e:
         return f"Error initializing agent: {e}", None

 import requests
 import inspect
 import pandas as pd
+from agent import GeminiAgent
 from evaluation_api import EvaluationApi
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = GeminiAgent()
     except Exception as e:
         return f"Error initializing agent: {e}", None

dry_run.py CHANGED Viewed

@@ -3,9 +3,9 @@ import tempfile
 import json
 import os
-from agent import Agent
-random.seed(1)
 def get_question(file_path: str) -> str:
     with open(file_path, "r") as file:
@@ -45,7 +45,7 @@ print(json.dumps(question, indent=2))
 # print(file_path)
-agent = Agent()
 # messages = agent.run(f"Question: `{question["Question"]}` File path: {file_path}")
 messages = agent.run(f"Question: `{question["Question"]}`")

 import json
 import os
+from agent import GeminiAgent
+random.seed(3)
 def get_question(file_path: str) -> str:
     with open(file_path, "r") as file:
 # print(file_path)
+agent = GeminiAgent()
 # messages = agent.run(f"Question: `{question["Question"]}` File path: {file_path}")
 messages = agent.run(f"Question: `{question["Question"]}`")

requirements.txt CHANGED Viewed

@@ -3,8 +3,6 @@ requests
 pandas
 openpyxl
 openai
-google-genai
-google-generativeai
 langchain
 langchain-community
 langchain-core
@@ -12,19 +10,6 @@ langchain-google-genai
 langgraph
 huggingface_hub
 python-dotenv
-wikipedia-api
-wikipedia
-arxiv
-datasets
-yt-dlp
-google-cloud-speech
-google-api-python-client
 duckduckgo-search
-pytesseract
-tavily-python
 langchain_groq
-langchain-tavily
-mwclient
-langchain_experimental
-easyocr
-smolagents

 pandas
 openpyxl
 openai
 langchain
 langchain-community
 langchain-core
 langgraph
 huggingface_hub
 python-dotenv
 duckduckgo-search
 langchain_groq
+mwclient

tools.py ADDED Viewed

	@@ -0,0 +1,213 @@

+import os
+import time
+import requests
+import mwclient
+from typing import Optional
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from langchain_core.messages import HumanMessage
+from langchain.tools import tool
+from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
+from langchain_community.document_loaders import WebBaseLoader
+load_dotenv()
+vision_llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", groq_api_key=os.getenv('GROQ_API_KEY'))
+@tool
+def web_search(query: str, domain: Optional[str] = None) -> str:
+    """
+    Perform a web search and return the raw results as a string.
+    Args:
+        query (str): The search query.
+        domain (Optional[str]): If provided, restricts the search to this domain.
+    Returns:
+        str: Raw search results concatenated into a string.
+    """
+    try:
+        time.sleep(2)
+        search = DuckDuckGoSearchAPIWrapper()
+        if domain:
+            query = f"{query} site:{domain}"
+        results = search.results(query, max_results=3)
+        if not results:
+            return "No results found."
+        formatted = ""
+        for r in results:
+            formatted += f"Title: {r['title']}\nURL: {r['link']}\nSnippet: {r['snippet']}\n\n"
+        return formatted.strip()
+    except Exception as e:
+        return f"Search error: {e}"
+@tool
+def visit_webpage(url: str):
+    """
+    Fetches and loads the content of a webpage given its URL.
+    Parameters:
+        url (str): The URL of the webpage to be visited.
+    Returns:
+        str: A string containing the loaded content of the webpage.
+    """
+    loader = WebBaseLoader(url)
+    loader.requests_kwargs = {'verify': False}
+    docs = loader.load()
+    return f"Page content: {docs}"
+@tool
+def wikipedia_search(query: str, max_docs: int = 1) -> str:
+    """
+    Search Wikipedia using mwclient and return exactly `max_docs` results.
+    Args:
+        query (str): The search query.
+        max_docs (int): Number of results to return. Default is 1.
+    """
+    try:
+        time.sleep(2)
+        site = mwclient.Site("en.wikipedia.org")
+        results = site.search(query, limit=max_docs)
+        output = ""
+        count = 0
+        for page_info in results:
+            title = page_info["title"]
+            try:
+                page = site.pages[title]
+                content = page.text()
+                first_paragraph = content.split('\n\n')[0]
+                url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
+                output += (
+                    f"--- Result {count + 1} ---\n"
+                    f"Title: {title}\n"
+                    f"Summary: {first_paragraph}...\n"
+                    f"URL: {url}\n\n"
+                )
+                count += 1
+                if count >= max_docs:
+                    break
+            except Exception:
+                continue
+        return output.strip() or "No valid matching pages found."
+    except Exception as e:
+        return f"Wikipedia search error: {str(e)}"
+@tool
+def extract_text_from_image(image_path: str) -> str:
+    """
+    Extracts text from an image file.
+    Args:
+        image_path (str): The file path to the image
+                          (e.g., '/path/to/document.png').
+    Returns:
+        str: Extracted text paragraphs separated by newlines,
+             prefixed with "Extracted text:\n". Returns an error message
+             string starting with 'Error:' on failure.
+    """
+    try:
+        time.sleep(2)
+        with open(image_path, "rb") as image_file:
+            image_bytes = image_file.read()
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        message = [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Extract text or provide explanation of this image"
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        },
+                    },
+                ]
+            )
+        ]
+        response = vision_llm.invoke(message)
+        all_text = response.content + "\n\n"
+        return all_text.strip()
+    except Exception as e:
+        error_msg = f"Error extracting text: {str(e)}"
+        print(error_msg)
+        return ""
+@tool
+def analyze_file(file_path: str) -> str:
+    """
+    Load and analyze a CSV or Excel file using pandas.
+    Provides basic metadata and summary statistics for numeric columns.
+    Args:
+        file_path (str): Path to the CSV or Excel file.
+    Returns:
+        str: Summary statistics and metadata about the file data.
+    """
+    try:
+        _, ext = os.path.splitext(file_path.lower())
+        if ext == '.csv':
+            df = pd.read_csv(file_path)
+        elif ext in ['.xls', '.xlsx']:
+            df = pd.read_excel(file_path)
+        else:
+            return f"Error: Unsupported file extension '{ext}'. Supported: .csv, .xls, .xlsx"
+        result = "Summary statistics for numeric columns:\n"
+        result += str(df.describe())
+        result += "\n\n"
+        result += f"Columns: {', '.join(df.columns)}\n\n"
+        result += "Content:\n"
+        result += df.astype(str).head(1000).to_string(index=False)
+        return result
+    except ImportError:
+        return "Error: Required libraries are not installed. Install with 'pip install pandas openpyxl'."
+    except FileNotFoundError:
+        return f"Error: File not found at path '{file_path}'."
+    except Exception as e:
+        return f"Error analyzing file: {str(e)}"