Final_Assignment_Template

Sleeping

App Files Files Community

Eduardo Guerra commited on Apr 30, 2025

Commit

eed0f02

1 Parent(s): 39c6564

feat: Added web_scrapper_tool

Browse files

Files changed (6) hide show

Dockerfile +46 -0
app.py +1 -0
requirements.txt +1 -0
src/agent.py +22 -3
src/tools/__init__.py +0 -0
src/tools/web_scrapper.py +23 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,46 @@

+FROM python:3.10-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    libx11-xcb1 \
+    libxcomposite1 \
+    libxrandr2 \
+    libasound2 \
+    libatk-bridge2.0-0 \
+    libatk1.0-0 \
+    libcups2 \
+    libdbus-1-3 \
+    libgdk-pixbuf2.0-0 \
+    libnspr4 \
+    libnss3 \
+    libxss1 \
+    libxtst6 \
+    lsb-release \
+    wget \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+# Install Playwright and browsers
+RUN pip install --upgrade pip
+RUN pip install playwright
+RUN playwright install --with-deps
+# Set up a non-root user
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+# Set the working directory
+WORKDIR /home/user/app
+# Copy the application files
+COPY . .
+# Install Python dependencies
+RUN pip install -r requirements.txt
+# Expose the port
+EXPOSE 7860
+# Command to run the application
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -139,6 +139,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                         logger.info(
                             f"Answer for task {task_id}: {submitted_answer}"
                         )
                         answers_payload.append(
                             {
                                 "task_id": task_id,

                         logger.info(
                             f"Answer for task {task_id}: {submitted_answer}"
                         )
                         answers_payload.append(
                             {
                                 "task_id": task_id,

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 duckduckgo-search==8.0.1
 gradio
 langchain-core==0.3.56

+beautifulsoup4==4.13.4
 duckduckgo-search==8.0.1
 gradio
 langchain-core==0.3.56

src/agent.py CHANGED Viewed

@@ -13,6 +13,8 @@ from langchain_core.messages import AIMessage, HumanMessage
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_openai import ChatOpenAI
 nest_asyncio.apply()
 logger = logging.getLogger(__name__)
@@ -24,7 +26,10 @@ class BasicAgent:
             prompt = ChatPromptTemplate.from_messages(
                 [
-                    ("system", "You are a helpful assistant"),
                     ("placeholder", "{chat_history}"),
                     ("human", "{input}"),
                     ("placeholder", "{agent_scratchpad}"),
@@ -52,7 +57,7 @@ class BasicAgent:
             # )
             # tools = toolkit.get_tools()
-            tools = [DuckDuckGoSearchResults()]
             logger.info(f"Tools: {tools}")
             agent = create_tool_calling_agent(llm, tools, prompt)
@@ -70,7 +75,21 @@ class BasicAgent:
     def __call__(self, question: str) -> str:
         try:
             logger.info(f"Processing question: {question}")
-            response = self.agent_executor.invoke({"input": question})
             logger.info(f"Response: {response}")
             return response
         except Exception as e:

 from langchain_core.prompts import ChatPromptTemplate
 from langchain_openai import ChatOpenAI
+from src.tools.web_scrapper import web_scrapper_tool
 nest_asyncio.apply()
 logger = logging.getLogger(__name__)
             prompt = ChatPromptTemplate.from_messages(
                 [
+                    (
+                        "system",
+                        "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise, additionally, only use numbers, don't add any units and don't use any other characters. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
+                    ),
                     ("placeholder", "{chat_history}"),
                     ("human", "{input}"),
                     ("placeholder", "{agent_scratchpad}"),
             # )
             # tools = toolkit.get_tools()
+            tools = [DuckDuckGoSearchResults(), web_scrapper_tool()]
             logger.info(f"Tools: {tools}")
             agent = create_tool_calling_agent(llm, tools, prompt)
     def __call__(self, question: str) -> str:
         try:
             logger.info(f"Processing question: {question}")
+            retries = 3
+            while retries > 0:
+                try:
+                    response = self.agent_executor.invoke({"input": question})[
+                        "output"
+                    ]
+                    response = response.split("FINAL ANSWER:")[1].strip()
+                    break
+                except Exception as e:
+                    logger.error(
+                        f"Error processing question: {e}", exc_info=True
+                    )
+                    response = "Could not process question"
+                    retries -= 1
             logger.info(f"Response: {response}")
             return response
         except Exception as e:

src/tools/__init__.py ADDED Viewed

File without changes

src/tools/web_scrapper.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from bs4 import BeautifulSoup
+from langgraph import Tool
+from playwright.sync_api import sync_playwright
+def extract_website_content(url: str) -> str:
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        page = browser.new_page()
+        page.goto(url)
+        html_content = page.content()
+        browser.close()
+    soup = BeautifulSoup(html_content, "html.parser")
+    return soup.get_text()
+def web_scrapper_tool():
+    return Tool.from_function(
+        func=extract_website_content,
+        name="scrape_website",
+        description="Extracts the main content of a webpage given its URL.",
+    )