Final_Assignment_Template

Sleeping

App Files Files Community

Eduardo Guerra commited on May 7, 2025

Commit

8d57271

1 Parent(s): eed0f02

feat: Final agent submission

Browse files

Files changed (12) hide show

.gitignore +2 -0
app.py +68 -37
execute_script.py +21 -0
requirements.txt +16 -2
setup.py +19 -0
src/agent.py +144 -53
src/final_answer.py +212 -0
src/tools.py +613 -0
src/tools/__init__.py +0 -0
src/tools/image_to_text.py +0 -0
src/tools/web_scrapper.py +0 -23
tests/test_tools.py +155 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,4 @@
 .env
 *__pycache__*

 .env
 *__pycache__*
+*.DS_Store
+*egg-info*

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import logging
 import os
 import sys
 import traceback
 import gradio as gr
 import pandas as pd
@@ -15,9 +17,6 @@ from src.agent import BasicAgent
 # Load environment variables from .env file
 load_dotenv()
-# Set OpenAI API key from environment variable
-os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
 # Configure logging
 logging.basicConfig(
@@ -29,7 +28,6 @@ logger = logging.getLogger(__name__)
 # (Keep Constants as is)
 # --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -50,7 +48,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             logger.warning("User not logged in.")
             return "Please Login to Hugging Face with the button.", None
-        api_url = DEFAULT_API_URL
         questions_url = f"{api_url}/questions"
         submit_url = f"{api_url}/submit"
@@ -112,8 +110,26 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         logger.info(f"Running agent on {len(questions_data)} questions...")
         # Limit the number of questions to process to avoid timeouts
-        max_questions = 3  # Process only 3 questions at a time
-        questions_to_process = questions_data[:max_questions]
         logger.info(
             f"Processing {len(questions_to_process)} out of {len(questions_data)} questions"
         )
@@ -131,37 +147,52 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                 # Use concurrent.futures for thread-safe timeout
                 with concurrent.futures.ThreadPoolExecutor() as executor:
-                    future = executor.submit(agent, question_text)
                     try:
-                        submitted_answer = future.result(
-                            timeout=60
-                        )  # 60 second timeout
-                        logger.info(
-                            f"Answer for task {task_id}: {submitted_answer}"
-                        )
-                        answers_payload.append(
-                            {
-                                "task_id": task_id,
-                                "submitted_answer": submitted_answer,
-                            }
-                        )
-                        results_log.append(
-                            {
-                                "Task ID": task_id,
-                                "Question": question_text,
-                                "Submitted Answer": submitted_answer,
-                            }
-                        )
-                    except concurrent.futures.TimeoutError:
-                        logger.error(f"Timeout processing task {task_id}")
-                        results_log.append(
-                            {
-                                "Task ID": task_id,
-                                "Question": question_text,
-                                "Submitted Answer": "TIMEOUT ERROR: Question processing timed out after 60 seconds",
-                            }
-                        )
             except Exception as e:
                 logger.error(
                     f"Error running agent on task {task_id}: {e}",

 import os
 import sys
 import traceback
+import tempfile
+import json
 import gradio as gr
 import pandas as pd
 # Load environment variables from .env file
 load_dotenv()
 # Configure logging
 logging.basicConfig(
 # (Keep Constants as is)
 # --- Constants ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
             logger.warning("User not logged in.")
             return "Please Login to Hugging Face with the button.", None
+        api_url = os.getenv("DEFAULT_API_URL")
         questions_url = f"{api_url}/questions"
         submit_url = f"{api_url}/submit"
         logger.info(f"Running agent on {len(questions_data)} questions...")
         # Limit the number of questions to process to avoid timeouts
+        max_questions = 20  # Process only 20 questions at a time
+        tasks_to_process = [
+            # "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3",
+            # "1f975693-876d-457b-a649-393859e79bf3",
+            # "840bfca7-4f7b-481a-8794-c560c340185d",
+            # "7bd855d8-463d-4ed5-93ca-5fe35145f733",
+        ]
+        # questions_to_process = questions_data[:max_questions]
+        if tasks_to_process:
+            questions_to_process = [
+                x
+                for x in questions_data
+                if x.get("task_id") in tasks_to_process
+            ]
+        else:
+            questions_to_process = questions_data[:max_questions]
         logger.info(
             f"Processing {len(questions_to_process)} out of {len(questions_data)} questions"
         )
                 # Use concurrent.futures for thread-safe timeout
                 with concurrent.futures.ThreadPoolExecutor() as executor:
                     try:
+                        future = executor.submit(agent, question_text, task_id)
+                        try:
+                            submitted_answer = future.result(
+                                timeout=180
+                            )  # 60 second timeout
+                            logger.info(
+                                f"Answer for task {task_id}: {submitted_answer}"
+                            )
+                            answers_payload.append(
+                                {
+                                    "task_id": task_id,
+                                    "submitted_answer": submitted_answer,
+                                }
+                            )
+                            results_log.append(
+                                {
+                                    "Task ID": task_id,
+                                    "Question": question_text,
+                                    "Submitted Answer": submitted_answer,
+                                }
+                            )
+                        except concurrent.futures.TimeoutError:
+                            logger.error(f"Timeout processing task {task_id}")
+                            results_log.append(
+                                {
+                                    "Task ID": task_id,
+                                    "Question": question_text,
+                                    "Submitted Answer": "TIMEOUT ERROR: Question processing timed out after 60 seconds",
+                                }
+                            )
+                    finally:
+                        # Clean up temporary directory after processing
+                        try:
+                            import shutil
+                            shutil.rmtree(temp_dir)
+                            logger.info(
+                                f"Cleaned up temporary directory for task {task_id}"
+                            )
+                        except Exception as e:
+                            logger.error(
+                                f"Error cleaning up temporary directory for task {task_id}: {e}"
+                            )
             except Exception as e:
                 logger.error(
                     f"Error running agent on task {task_id}: {e}",

execute_script.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from google import genai
+import os
+from dotenv import load_dotenv
+load_dotenv()
+if __name__ == "__main__":
+    try:
+        client = genai.Client(api_key=os.getenv("GEMINI_KEY"))
+        models = client.models.list()
+        result = "Available Gemini Models:\n\n"
+        for model in models:
+            result += f"Model: {model.name}\n"
+            result += f"Description: {model.description}\n"
+            result += "-" * 50 + "\n"
+        print(result)
+    except Exception as e:
+        print(f"Error listing models: {str(e)}")

requirements.txt CHANGED Viewed

@@ -1,16 +1,30 @@
 beautifulsoup4==4.13.4
 duckduckgo-search==8.0.1
 gradio
 langchain-core==0.3.56
 langchain-community==0.3.23
-langchain-huggingface==0.1.2
-langchain-openai==0.3.14
 langgraph==0.3.34
 lxml==5.4.0
 nest-asyncio==1.6.0
 playwright==1.51.0
 python-dotenv==1.1.0
 requests
 sentencepiece==0.2.0
 torch==2.7.0
 transformers==4.51.3

 beautifulsoup4==4.13.4
 duckduckgo-search==8.0.1
+google-ai-generativelanguage==0.6.15
+google-genai==1.13.0
+google-generativeai==0.8.5
 gradio
+imageio
+imageio[ffmpeg]
+imageio[pyav]
 langchain-core==0.3.56
 langchain-community==0.3.23
+langchain-experimental==0.3.4
+langchain-google-genai==2.0.10
+langchain-google-community==2.0.7
 langgraph==0.3.34
 lxml==5.4.0
 nest-asyncio==1.6.0
+Pillow
 playwright==1.51.0
+pytesseract
+pytest==8.3.5
 python-dotenv==1.1.0
 requests
+rizaio==0.11.0
 sentencepiece==0.2.0
 torch==2.7.0
 transformers==4.51.3
+typing-extensions==4.13.2
+youtube-transcript-api==1.0.3
+yt-dlp==2025.4.30

setup.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from setuptools import setup, find_packages
+def read_requirements():
+    with open("requirements.txt") as f:
+        return [
+            line.strip()
+            for line in f
+            if line.strip() and not line.startswith("#")
+        ]
+setup(
+    name="src",
+    version="0.1",
+    packages=find_packages(),
+    install_requires=read_requirements(),
+    python_requires=">=3.8",
+)

src/agent.py CHANGED Viewed

@@ -1,21 +1,31 @@
 import logging
 import os
-# This import is required only for jupyter notebooks, since they have their own eventloop
-import nest_asyncio
-from langchain.agents import AgentExecutor, create_tool_calling_agent, tool
-from langchain_community.agent_toolkits import PlayWrightBrowserToolkit
-from langchain_community.tools import DuckDuckGoSearchResults
-from langchain_community.tools.playwright.utils import (
-    create_async_playwright_browser,  # A synchronous browser is available, though it isn't compatible with jupyter.\n",	  },
-)
-from langchain_core.messages import AIMessage, HumanMessage
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_openai import ChatOpenAI
-from src.tools.web_scrapper import web_scrapper_tool
-nest_asyncio.apply()
 logger = logging.getLogger(__name__)
@@ -24,11 +34,13 @@ class BasicAgent:
         try:
             logger.info("Initializing BasicAgent")
             prompt = ChatPromptTemplate.from_messages(
                 [
                     (
                         "system",
-                        "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise, additionally, only use numbers, don't add any units and don't use any other characters. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
                     ),
                     ("placeholder", "{chat_history}"),
                     ("human", "{input}"),
@@ -37,61 +49,140 @@ class BasicAgent:
             )
             logger.info("Created prompt template")
-            # Log environment variables
-            openai_api_key = os.getenv("OPENAI_API_KEY")
-            logger.info(f"OPENAI_API_KEY exists: {openai_api_key is not None}")
-            # Create OpenAI model
-            logger.info("Creating OpenAI model...")
-            llm = ChatOpenAI(
-                model="gpt-3.5-turbo",
-                openai_api_key=openai_api_key,
-                temperature=0.7,
-                max_tokens=1024,
             )
-            logger.info("Created OpenAI model successfully")
-            # async_browser = create_async_playwright_browser()
-            # toolkit = PlayWrightBrowserToolkit.from_browser(
-            #     async_browser=async_browser
-            # )
-            # tools = toolkit.get_tools()
-            tools = [DuckDuckGoSearchResults(), web_scrapper_tool()]
-            logger.info(f"Tools: {tools}")
             agent = create_tool_calling_agent(llm, tools, prompt)
             logger.info("Created tool calling agent")
             self.agent_executor = AgentExecutor(
-                agent=agent, tools=tools, verbose=True
             )
             logger.info("Created agent executor")
         except Exception as e:
-            logger.error(f"Error initializing agent: {e}", exc_info=True)
             raise
-    def __call__(self, question: str) -> str:
-        try:
-            logger.info(f"Processing question: {question}")
-            retries = 3
-            while retries > 0:
                 try:
-                    response = self.agent_executor.invoke({"input": question})[
-                        "output"
-                    ]
-                    response = response.split("FINAL ANSWER:")[1].strip()
-                    break
                 except Exception as e:
                     logger.error(
-                        f"Error processing question: {e}", exc_info=True
                     )
-                    response = "Could not process question"
-                    retries -= 1
-            logger.info(f"Response: {response}")
-            return response
-        except Exception as e:
-            logger.error(f"Error processing question: {e}", exc_info=True)
-            raise

 import logging
 import os
+from typing import Optional, Dict
+import tempfile
+from langchain.agents import AgentExecutor, create_tool_calling_agent
+from langchain_google_community import GoogleSearchResults
+from langchain_google_community import GoogleSearchAPIWrapper
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.tools import Tool
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_experimental.utilities import PythonREPL
+from src.final_answer import create_final_answer_graph, validate_answer
+from src.tools import (
+    analyze_csv_file,
+    analyze_excel_file,
+    download_file_from_url,
+    extract_text_from_image,
+    read_file,
+    review_youtube_video,
+    transcribe_audio,
+    transcribe_youtube,
+    use_vision_model,
+    video_frames_to_images,
+    website_scrape,
+)
 logger = logging.getLogger(__name__)
         try:
             logger.info("Initializing BasicAgent")
+            # Create the prompt template
             prompt = ChatPromptTemplate.from_messages(
                 [
                     (
                         "system",
+                        """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+                """,
                     ),
                     ("placeholder", "{chat_history}"),
                     ("human", "{input}"),
             )
             logger.info("Created prompt template")
+            # Initialize Gemini model
+            logger.info("Creating Gemini model...")
+            llm = ChatGoogleGenerativeAI(
+                model="models/gemini-2.5-pro-preview-03-25",
+                google_api_key=os.getenv("GEMINI_KEY"),
+                temperature=0.2,
             )
+            logger.info("Created Gemini model successfully")
+            # Define available tools
+            tools = [
+                GoogleSearchResults(
+                    api_wrapper=GoogleSearchAPIWrapper(
+                        google_api_key=os.getenv("GOOGLE_SEARCH_API_KEY"),
+                        google_cse_id=os.getenv("GOOGLE_CSE_ID"),
+                        k=5,  # Number of results to return
+                    )
+                ),
+                analyze_csv_file,
+                analyze_excel_file,
+                download_file_from_url,
+                extract_text_from_image,
+                read_file,
+                review_youtube_video,
+                transcribe_audio,
+                transcribe_youtube,
+                use_vision_model,
+                video_frames_to_images,
+                website_scrape,
+                Tool(
+                    name="python_repl",
+                    description="A Python shell. Use this to execute python commands. Input # should be a valid python command. If you want to see the output of a value, # you should print it out with `print(...)`.",
+                    func=PythonREPL().run,
+                ),
+            ]
+            logger.info("Tools: %s", tools)
+            # Create the agent
             agent = create_tool_calling_agent(llm, tools, prompt)
             logger.info("Created tool calling agent")
+            # Create the agent executor
             self.agent_executor = AgentExecutor(
+                agent=agent,
+                tools=tools,
+                return_intermediate_steps=True,
+                verbose=True,
             )
             logger.info("Created agent executor")
+            # Create the graph
+            self.validation_graph = create_final_answer_graph()
         except Exception as e:
+            logger.error("Error initializing agent: %s", e, exc_info=True)
             raise
+    def __call__(self, question: str, task_id: str) -> str:
+        """Execute the agent with the given question and optional file.
+        Args:
+            question (str): The question to answer
+            task_id (str): The task ID to fetch the file
+        """
+        max_retries = 3
+        attempt = 0
+        # Create a temporary directory that will be automatically cleaned up
+        with tempfile.TemporaryDirectory() as temp_dir:
+            while attempt < max_retries:
+                default_api_url = os.getenv("DEFAULT_API_URL")
+                file_url = f"{default_api_url}/files/{task_id}"
+                try:
+                    # Download file to temporary directory
+                    file = download_file_from_url.invoke(
+                        {
+                            "url": file_url,
+                            "directory": temp_dir,
+                        }
+                    )
+                except Exception as e:
+                    logger.error(f"Error downloading file: {e}")
+                    file = None
                 try:
+                    attempt += 1
+                    logger.info(f"Attempt {attempt} of {max_retries}")
+                    # Prepare input with file information
+                    if file and file.get("type") != "error":
+                        input_data = {
+                            "input": question
+                            + f" [File: type={file.get('type', 'None')}, path={file.get('path', 'None')}]",
+                        }
+                    else:
+                        input_data = {
+                            "input": question,
+                        }
+                    # Run the agent to get the answer
+                    result = self.agent_executor.invoke(input_data)
+                    answer = result.get("output", "")
+                    logger.info(f"Attempt {attempt} result: {result}")
+                    # Run validation
+                    validation_result = validate_answer(
+                        self.validation_graph,
+                        answer,
+                        [result.get("intermediate_steps", [])],
+                    )
+                    valid_answer = validation_result.get("valid_answer", False)
+                    final_answer = validation_result.get("final_answer", "")
+                    if valid_answer:
+                        logger.info(f"Valid answer found on attempt {attempt}")
+                        return final_answer
+                    logger.warning(
+                        f"Validation failed on attempt {attempt}: {final_answer}"
+                    )
+                    if attempt >= max_retries:
+                        raise Exception(
+                            f"Failed to get valid answer after {max_retries} attempts. Last error: {final_answer}"
+                        )
                 except Exception as e:
                     logger.error(
+                        f"Error in attempt {attempt}: {e}", exc_info=True
                     )
+                    if attempt >= max_retries:
+                        raise Exception(
+                            f"Failed after {max_retries} attempts. Last error: {str(e)}"
+                        )
+                    continue

src/final_answer.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os
+from typing import Any, Dict, Optional
+from typing_extensions import TypedDict
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langgraph.graph import Graph, StateGraph, START, END
+from langchain_google_genai import ChatGoogleGenerativeAI
+class AgentState(TypedDict):
+    """State for the final answer validation graph."""
+    question: str
+    answer: str
+    final_answer: str | None
+    agent_memory: Any
+    valid_answer: bool
+def extract_answer(state: AgentState) -> Dict:
+    """Extract and format the final answer from the state.
+    Args:
+        state: The state of the agent.
+    Returns:
+        A dictionary with the formatted final answer.
+    """
+    # Extract the final answer from the state
+    sep_token = "FINAL ANSWER:"
+    raw_answer = state["answer"]
+    # Extract the answer after the separator if it exists
+    if sep_token in raw_answer:
+        formatted_answer = raw_answer.split(sep_token)[1].strip()
+    else:
+        formatted_answer = raw_answer.strip()
+    # Remove any brackets from lists
+    formatted_answer = formatted_answer.replace("[", "").replace("]", "")
+    # Remove units unless specified
+    if not any(
+        unit in formatted_answer.lower()
+        for unit in ["$", "%", "dollars", "percent"]
+    ):
+        formatted_answer = formatted_answer.replace("$", "").replace("%", "")
+    # Remove commas from numbers
+    parts = formatted_answer.split(",")
+    formatted_parts = []
+    for part in parts:
+        part = part.strip()
+        if part.replace(".", "").isdigit():  # Check if it's a number
+            part = part.replace(",", "")
+        formatted_parts.append(part)
+    formatted_answer = ", ".join(formatted_parts)
+    return {"final_answer": formatted_answer}
+def reasoning_check(state: AgentState) -> Dict:
+    """
+    Node that checks the reasoning of the final answer.
+    Args:
+        state: The state of the agent.
+    Returns:
+        A dictionary with the reasoning check result.
+    """
+    model = ChatGoogleGenerativeAI(
+        model="models/gemini-2.0-flash-lite",
+        google_api_key=os.getenv("GEMINI_KEY"),
+        temperature=0.2,
+    )
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                """You are a strict validator of answers. Your job is to check if the reasoning and results are correct.
+        You should have >90% confidence that the answer is correct to pass it.
+        First list reasons why yes/no, then write your final decision: PASS in caps lock if it is satisfactory, FAIL if it is not.""",
+            ),
+            (
+                "human",
+                """
+        Here is a user-given task and the agent steps: {agent_memory}
+        Now here is the answer that was given: {final_answer}
+        Please check that the reasoning process and results are correct: do they correctly answer the given task?
+        """,
+            ),
+        ]
+    )
+    chain = prompt | model | StrOutputParser()
+    output = chain.invoke(
+        {
+            "agent_memory": state["agent_memory"],
+            "final_answer": state["final_answer"],
+        }
+    )
+    print("Reasoning Feedback: ", output)
+    if "FAIL" in output:
+        return {"valid_answer": False}
+    return {"valid_answer": True}
+def formatting_check(state: AgentState) -> Dict:
+    """
+    Node that checks the formatting of the final answer.
+    Args:
+        state: The state of the agent.
+    Returns:
+        A dictionary with the formatting check result.
+    """
+    model = ChatGoogleGenerativeAI(
+        model="models/gemini-2.0-flash-lite",
+        google_api_key=os.getenv("GEMINI_KEY"),
+        temperature=0.2,
+    )
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+        """,
+            ),
+            (
+                "human",
+                """
+        Here is a user-given task and the agent steps: {agent_memory}
+        Now here is the FINAL ANSWER that was given: {final_answer}
+        Ensure the FINAL ANSWER is in the right format as asked for by the task.
+        """,
+            ),
+        ]
+    )
+    chain = prompt | model | StrOutputParser()
+    output = chain.invoke(
+        {
+            "agent_memory": state["agent_memory"],
+            "final_answer": state["final_answer"],
+        }
+    )
+    print("Formatting Feedback: ", output)
+    if "FAIL" in output:
+        return {"valid_answer": False}
+    return {"valid_answer": True}
+def create_final_answer_graph() -> Graph:
+    """Create a graph that validates the final answer.
+    Returns:
+        A graph that validates the final answer.
+    """
+    # Create the graph
+    workflow = StateGraph(AgentState)
+    # Add nodes
+    workflow.add_node("extract_answer", extract_answer)
+    workflow.add_node("reasoning_check", reasoning_check)
+    workflow.add_node("formatting_check", formatting_check)
+    # Add edges
+    workflow.add_edge(START, "extract_answer")
+    workflow.add_edge("extract_answer", "reasoning_check")
+    workflow.add_edge("reasoning_check", "formatting_check")
+    workflow.add_edge("formatting_check", END)
+    # Compile the graph
+    return workflow.compile()
+def validate_answer(graph: Graph, answer: str, agent_memory: Any) -> Dict:
+    """Validate the answer using the LangGraph workflow.
+    Args:
+        graph: The validation graph.
+        answer: The answer to validate.
+        agent_memory: The agent's memory.
+    Returns:
+        A dictionary with validation results.
+    """
+    try:
+        # Initialize state
+        initial_state = {
+            "answer": answer,
+            "final_answer": None,
+            "agent_memory": agent_memory,
+            "valid_answer": False,
+        }
+        # Run the graph
+        result = graph.invoke(initial_state)
+        return {
+            "valid_answer": result.get("valid_answer", False),
+            "final_answer": result.get("final_answer", None),
+        }
+    except Exception as e:
+        print(f"Validation failed: {e}")
+        return {"valid_answer": False, "final_answer": None}

src/tools.py ADDED Viewed

	@@ -0,0 +1,613 @@

+import base64
+import shutil
+import os
+import tempfile
+import uuid
+from typing import List, Optional, Dict, Union
+import re
+import time
+from datetime import datetime, timedelta
+from bs4 import BeautifulSoup
+from playwright.sync_api import sync_playwright
+import imageio
+import pandas as pd
+import pytesseract
+import requests
+import yt_dlp
+from dotenv import load_dotenv
+from google import genai
+from google.genai import types
+from langchain_core.tools import tool
+from PIL import Image
+from youtube_transcript_api import YouTubeTranscriptApi
+load_dotenv()
+# Vision Model Tool
+@tool
+def use_vision_model(
+    question: str, image_paths: List[str], mime_type: str
+) -> str:
+    """Use a Vision Model to answer a question about a set of images.
+    Args:
+        question (str): The question you are asking about the images.
+        image_paths (List[str]): The paths to the images to use for the question.
+        mime_type (str): The mime type of the image.
+    Returns:
+        str: The answer to the question
+    """
+    try:
+        client = genai.Client(api_key=os.getenv("GEMINI_KEY"))
+        model = "models/gemini-2.0-flash-001"
+        # Prepare the content parts
+        parts = []
+        for image_path in image_paths:
+            with open(image_path, "rb") as f:
+                image_bytes = f.read()
+        response = []
+        for chunk in client.models.generate_content_stream(
+            model=model,
+            contents=[
+                question,
+                types.Part.from_bytes(data=image_bytes, mime_type=mime_type),
+            ],
+        ):
+            response.append(chunk.text)
+        return " ".join(response)
+    except Exception as e:
+        return f"Error using vision model: {str(e)}"
+# YouTube Video Review Tool
+@tool
+def review_youtube_video(url: str, question: str) -> str:
+    """Reviews a YouTube video and answers a specific question about that video.
+    Args:
+        url (str): the URL to the YouTube video.
+        question (str): The question you are asking about the video
+    Returns:
+        str: The answer to the question
+    """
+    try:
+        client = genai.Client(api_key=os.getenv("GEMINI_KEY"))
+        model = "models/gemini-1.5-flash-8b"
+        response = client.models.generate_content(
+            model=model,
+            contents=types.Content(
+                parts=[
+                    types.Part(file_data=types.FileData(file_uri=url)),
+                    types.Part(text=question),
+                ]
+            ),
+        )
+        return response.text
+    except Exception as e:
+        return f"Error asking {model} about video: {str(e)}"
+# YouTube Frames to Images Tool
+@tool
+def video_frames_to_images(
+    url: str,
+    folder_name: str,
+    sample_interval_seconds: int = 5,
+) -> List[str]:
+    """Extracts frames from a video at specified intervals and saves them as images.
+    Args:
+        url (str): the URL to the video.
+        folder_name (str): the name of the folder to save the images to.
+        sample_interval_seconds (int): the interval between frames to sample.
+    Returns:
+        List[str]: A list of paths to the saved image files.
+    """
+    # Create a subdirectory for the frames
+    frames_dir = os.path.join(folder_name, "frames")
+    os.makedirs(frames_dir, exist_ok=True)
+    ydl_opts = {
+        "format": "bestvideo[height<=1080]+bestaudio/best[height<=1080]/best",
+        "outtmpl": os.path.join(folder_name, "video.%(ext)s"),
+        "quiet": True,
+        "noplaylist": True,
+        "merge_output_format": "mp4",
+        "force_ipv4": True,
+    }
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            video_path = next(
+                (
+                    os.path.join(folder_name, f)
+                    for f in os.listdir(folder_name)
+                    if f.endswith(".mp4")
+                ),
+                None,
+            )
+            if not video_path:
+                raise RuntimeError("Failed to download video as mp4")
+            reader = imageio.get_reader(video_path)
+            metadata = reader.get_meta_data()
+            fps = metadata.get("fps")
+            if fps is None:
+                reader.close()
+                raise RuntimeError(
+                    "Unable to determine FPS from video metadata"
+                )
+            frame_interval = int(fps * sample_interval_seconds)
+            image_paths: List[str] = []
+            for idx, frame in enumerate(reader):
+                if idx % frame_interval == 0:
+                    # Save frame as image
+                    image_path = os.path.join(
+                        frames_dir, f"frame_{idx:06d}.jpg"
+                    )
+                    imageio.imwrite(image_path, frame)
+                    image_paths.append(image_path)
+            reader.close()
+            return image_paths
+    except Exception as e:
+        raise RuntimeError(f"Error processing video frames: {str(e)}") from e
+# File Reading Tool
+@tool
+def read_file(filepath: str) -> str:
+    """Reads the content of a text file.
+    Args:
+        filepath (str): the path to the file to read.
+    Returns:
+        str: The content of the file.
+    """
+    try:
+        with open(filepath, "r", encoding="utf-8") as file:
+            content = file.read()
+        return content
+    except FileNotFoundError:
+        return f"File not found: {filepath}"
+    except IOError as e:
+        return f"Error reading file: {str(e)}"
+# File Download Tool
+@tool
+def download_file_from_url(
+    url: str, directory: str
+) -> Dict[str, Union[str, None]]:
+    """Downloads a file from a URL and saves it to a directory.
+    Args:
+        url (str): the URL to download the file from.
+        directory (str): the directory to save the file to.
+    Returns:
+        Dict[str, Union[str, None]]: A dictionary containing the file type and path.
+    """
+    try:
+        response = requests.get(url, stream=True, timeout=10)
+        response.raise_for_status()
+        content_type = response.headers.get("content-type", "").lower()
+        # Try to get filename from headers
+        filename = None
+        cd = response.headers.get("content-disposition", "")
+        match = re.search(r"filename\*=UTF-8\'\'(.+)", cd) or re.search(
+            r'filename="?([^"]+)"?', cd
+        )
+        if match:
+            filename = match.group(1)
+        # If not in headers, try URL
+        if not filename:
+            filename = os.path.basename(url.split("?")[0])
+        # Fallback to generated filename
+        if not filename:
+            extension = {
+                "image/jpeg": ".jpg",
+                "image/png": ".png",
+                "image/gif": ".gif",
+                "audio/wav": ".wav",
+                "audio/mpeg": ".mp3",
+                "video/mp4": ".mp4",
+                "text/plain": ".txt",
+                "text/csv": ".csv",
+                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
+                "application/vnd.ms-excel": ".xls",
+                "application/octet-stream": ".bin",
+            }.get(content_type, ".bin")
+            filename = f"downloaded_{uuid.uuid4().hex[:8]}{extension}"
+        os.makedirs(directory, exist_ok=True)
+        file_path = os.path.join(directory, filename)
+        with open(file_path, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        # shutil.copy(file_path, os.getcwd())
+        if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
+            return {"type": content_type, "path": file_path}
+        else:
+            return {
+                "type": "error",
+                "path": None,
+                "error": "Failed to save file",
+            }
+    except Exception as e:
+        return {
+            "type": "error",
+            "path": None,
+            "error": f"Error downloading file: {str(e)}",
+        }
+# Text Extraction from Image Tool
+@tool
+def extract_text_from_image(image_path: str) -> str:
+    """Extracts text from an image using OCR.
+    Args:
+        image_path (str): the path to the image to extract text from.
+    Returns:
+        str: The text extracted from the image.
+    """
+    try:
+        image = Image.open(image_path)
+        text = pytesseract.image_to_string(image)
+        return f"Extracted text from image:\n\n{text}"
+    except Exception as e:
+        return f"Error extracting text from image: {str(e)}"
+# CSV Analysis Tool
+@tool
+def analyze_csv_file(file_path: str, query: str) -> str:
+    """Analyzes a CSV file and answers questions about its contents using Gemini.
+    Args:
+        file_path (str): the path to the CSV file to analyze.
+        query (str): the question to answer about the CSV file.
+    Returns:
+        str: The result of the analysis.
+    """
+    try:
+        # Read the CSV file
+        df = pd.read_csv(file_path)
+        # Initialize Gemini
+        client = genai.Client(api_key=os.getenv("GEMINI_KEY"))
+        model = "models/gemini-1.5-flash-8b"
+        # Convert DataFrame to a string representation
+        df_str = df.to_string()
+        # Create a prompt for Gemini
+        prompt = f"""Analyze this CSV data and provide insights:
+Dimensions: {len(df)} rows × {len(df.columns)} columns
+Data:
+{df_str}
+Please provide:
+1. A summary of the data structure and content
+2. Key patterns and insights
+3. Potential data quality issues
+4. Suggestions for analysis
+User Query: {query}
+Please format your response in a clear, structured way with sections and bullet points."""
+        # Get analysis from Gemini
+        response = client.models.generate_content(
+            model=model,
+            contents=types.Content(
+                parts=[
+                    types.Part(text=df_str),
+                    types.Part(text=prompt),
+                ]
+            ),
+        )
+        result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n\n"
+        result += response.text
+        return result
+    except Exception as e:
+        return f"Error analyzing CSV file: {str(e)}"
+# Excel Analysis Tool
+@tool
+def analyze_excel_file(file_path: str, query: str) -> str:
+    """Analyzes an Excel file and answers questions about its contents using Gemini.
+    Args:
+        file_path (str): the path to the Excel file to analyze.
+        query (str): the question to answer about the Excel file.
+    Returns:
+        str: The result of the analysis.
+    """
+    try:
+        # Read all sheets from the Excel file
+        excel_file = pd.ExcelFile(file_path)
+        sheet_names = excel_file.sheet_names
+        # Initialize Gemini
+        client = genai.Client(api_key=os.getenv("GEMINI_KEY"))
+        model = "models/gemini-1.5-flash-8b"
+        result = f"Excel file loaded with {len(sheet_names)} sheets: {', '.join(sheet_names)}\n\n"
+        # Analyze each sheet
+        for sheet_name in sheet_names:
+            df = pd.read_excel(file_path, sheet_name=sheet_name)
+            # Convert DataFrame to a string representation
+            df_str = df.to_string()
+            # Create a prompt for Gemini
+            prompt = f"""Analyze this Excel sheet data and provide insights:
+Sheet Name: {sheet_name}
+Dimensions: {len(df)} rows × {len(df.columns)} columns
+Data:
+{df_str}
+Please provide:
+1. A summary of the data structure and content
+2. Key patterns and insights
+3. Potential data quality issues
+4. Suggestions for analysis
+User Query: {query}
+Please format your response in a clear, structured way with sections and bullet points."""
+            # Get analysis from Gemini
+            response = client.models.generate_content(
+                model=model,
+                contents=types.Content(
+                    parts=[types.Part(text=df_str), types.Part(text=prompt)]
+                ),
+            )
+            result += f"=== Sheet: {sheet_name} ===\n"
+            result += response.text + "\n"
+            result += "=" * 50 + "\n\n"
+        return result
+    except Exception as e:
+        return f"Error analyzing Excel file: {str(e)}"
+# Audio Transcription Tool
+@tool
+def transcribe_audio(audio_file_path: str, mime_type: str) -> str:
+    """Transcribes an audio file using Gemini's audio capabilities.
+    Args:
+        audio_file_path (str): the path to the audio file to transcribe.
+        mime_type (str): the mime type of the audio file.
+    Returns:
+        str: The transcript of the audio file.
+    """
+    try:
+        # Initialize the model
+        client = genai.Client(api_key=os.getenv("GEMINI_KEY"))
+        model = "models/gemini-1.5-flash-8b"
+        # Read and encode the audio file
+        with open(audio_file_path, "rb") as audio_file:
+            audio_data = audio_file.read()
+        # Create the content with audio data
+        contents = types.Content(
+            parts=[
+                types.Part.from_bytes(
+                    data=audio_data,
+                    mime_type=mime_type,
+                ),
+                types.Part(text="Please transcribe this audio file."),
+            ]
+        )
+        # Generate transcription
+        response = client.models.generate_content(
+            model=model, contents=contents
+        )
+        return response.text
+    except Exception as e:
+        return f"Error transcribing audio: {str(e)}"
+def _extract_video_id(url: str) -> Optional[str]:
+    """Extract video ID from YouTube URL.
+    Args:
+        url (str): the URL to the YouTube video.
+    Returns:
+        str: The video ID of the YouTube video.
+    """
+    patterns = [
+        r"(?:youtube\.com\/watch\?v=|youtube\.com\/embed\/|youtu\.be\/)([^&\n?#]+)",
+        r"(?:youtube\.com\/v\/|youtube\.com\/e\/|youtube\.com\/user\/[^\/]+\/|youtube\.com\/[^\/]+\/|youtube\.com\/embed\/|youtu\.be\/)([^&\n?#]+)",
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            return match.group(1)
+    return None
+@tool
+def transcribe_youtube(url: str) -> str:
+    """Transcribes a YouTube video using YouTube Transcript API or Gemini as fallback.
+    Args:
+        url (str): the URL to the YouTube video.
+    Returns:
+        str: The transcript of the YouTube video.
+    """
+    try:
+        # First try using YouTube Transcript API
+        video_id = _extract_video_id(url)
+        if not video_id:
+            raise ValueError(f"Invalid YouTube URL: {url}")
+        try:
+            # Try to get transcript in English
+            transcript_chunks = YouTubeTranscriptApi.get_transcript(
+                video_id, languages=["en"]
+            )
+            # Combine all chunks into a single transcript with timestamps
+            transcript = ""
+            for chunk in transcript_chunks:
+                timestamp = str(timedelta(seconds=int(chunk["start"])))
+                transcript += f"[{timestamp}] {chunk['text']}\n"
+            return transcript
+        except Exception as transcript_error:
+            print(
+                f"Failed to get transcript using YouTube API: {str(transcript_error)}"
+            )
+            print("Falling back to Gemini-based transcription...")
+            # Fallback to Gemini-based transcription
+            with tempfile.TemporaryDirectory() as tmpdir:
+                # Download audio from YouTube
+                ydl_opts = {
+                    "format": "bestaudio/best",
+                    "outtmpl": os.path.join(tmpdir, "audio.%(ext)s"),
+                    "quiet": True,
+                    "noplaylist": True,
+                    "postprocessors": [
+                        {
+                            "key": "FFmpegExtractAudio",
+                            "preferredcodec": "wav",
+                            "preferredquality": "192",
+                        }
+                    ],
+                }
+                try:
+                    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                        info = ydl.extract_info(url, download=True)
+                        audio_path = next(
+                            (
+                                os.path.join(tmpdir, f)
+                                for f in os.listdir(tmpdir)
+                                if f.endswith(".wav")
+                            ),
+                            None,
+                        )
+                        if not audio_path:
+                            raise RuntimeError(
+                                "Failed to download audio"
+                            ) from transcript_error
+                        # Use Gemini to transcribe the audio
+                        client = genai.Client(api_key=os.getenv("GEMINI_KEY"))
+                        model = "models/gemini-1.5-flash-8b"
+                        # Read the audio file
+                        with open(audio_path, "rb") as audio_file:
+                            audio_data = audio_file.read()
+                        # Create the content with audio data
+                        contents = types.Content(
+                            parts=[
+                                types.Part(
+                                    file_data=types.FileData(
+                                        mime_type="audio/wav",
+                                        data=audio_data,
+                                    )
+                                ),
+                                types.Part(
+                                    text="Please transcribe this audio file. Include timestamps if possible."
+                                ),
+                            ]
+                        )
+                        # Generate transcription
+                        response = client.models.generate_content(
+                            model=model, contents=contents
+                        )
+                        return response.text
+                except yt_dlp.utils.DownloadError as e:
+                    raise RuntimeError(
+                        f"Error downloading YouTube video: {str(e)}"
+                    ) from transcript_error
+                except Exception as e:
+                    raise RuntimeError(
+                        f"Error processing YouTube video: {str(e)}"
+                    ) from transcript_error
+    except Exception as e:
+        raise RuntimeError(f"Error in YouTube transcription: {str(e)}") from e
+@tool
+def website_scrape(url: str, question: str) -> str:
+    """Scrapes a website and returns the text.
+    Args:
+        url (str): the URL to the website to scrape.
+    Returns:
+        str: The text of the website.
+    """
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        page = browser.new_page()
+        page.goto(url)
+        html_content = page.content()
+        browser.close()
+    soup = BeautifulSoup(html_content, "html.parser")
+    # Extract text from the website
+    text = soup.get_text()
+    return text

src/tools/__init__.py DELETED Viewed

File without changes

src/tools/image_to_text.py DELETED Viewed

File without changes

src/tools/web_scrapper.py DELETED Viewed

@@ -1,23 +0,0 @@
-from bs4 import BeautifulSoup
-from langgraph import Tool
-from playwright.sync_api import sync_playwright
-def extract_website_content(url: str) -> str:
-    with sync_playwright() as p:
-        browser = p.chromium.launch(headless=True)
-        page = browser.new_page()
-        page.goto(url)
-        html_content = page.content()
-        browser.close()
-    soup = BeautifulSoup(html_content, "html.parser")
-    return soup.get_text()
-def web_scrapper_tool():
-    return Tool.from_function(
-        func=extract_website_content,
-        name="scrape_website",
-        description="Extracts the main content of a webpage given its URL.",
-    )

tests/test_tools.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import os
+import pytest
+from dotenv import load_dotenv
+import tempfile
+from src.tools import (
+    use_vision_model,
+    review_youtube_video,
+    video_frames_to_images,
+    read_file,
+    download_file_from_url,
+    extract_text_from_image,
+    analyze_csv_file,
+    analyze_excel_file,
+    transcribe_audio,
+    transcribe_youtube,
+    website_scrape,
+)
+# Load environment variables
+load_dotenv()
+@pytest.fixture
+def test_dir(tmp_path):
+    """Create a temporary directory for test files."""
+    return tmp_path
+def test_website_scrape():
+    """Test website scraping functionality."""
+    url = "https://en.wikipedia.org/wiki/2025_World_Snooker_Championship"
+    question = "What is the main heading?"
+    result = website_scrape.invoke({"url": url, "question": question})
+    assert isinstance(result, str)
+    assert len(result) > 0
+    print("\nWebsite Scrape Test Result:", result[:200])
+def test_read_file(test_dir):
+    """Test file reading functionality."""
+    # Create a test file
+    test_file = test_dir / "test.txt"
+    test_file.write_text("Test content")
+    result = read_file.invoke({"filepath": str(test_file)})
+    assert isinstance(result, str)
+    assert result == "Test content"
+    print("\nRead File Test Result:", result)
+def test_download_file_from_url():
+    """Test file downloading functionality."""
+    path = "https://fastly.picsum.photos/id/856/400/400.jpg?hmac=tb7tfZIDAlSxzTJ6V0l3sJH4CxcWXW1z4aiWrqbbQSs"
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_file = os.path.join(temp_dir, "test.jpg")
+        print(f"Downloading file to: {temp_file}")
+        result = download_file_from_url.invoke(
+            {"url": path, "file_path": temp_file}
+        )
+        assert isinstance(result, str)
+        assert os.path.exists(temp_file)
+def test_extract_text_from_image():
+    """Test OCR functionality."""
+    image_path = "test_files/text_in_image.jpg"
+    result = extract_text_from_image.invoke({"image_path": image_path})
+    assert isinstance(result, str)
+    print("\nExtract Text Test Result:", result)
+def test_analyze_csv_file(test_dir):
+    """Test CSV analysis functionality."""
+    # Create a test CSV file
+    file_path = "test_files/customers-100.csv"
+    result = analyze_csv_file.invoke(
+        {
+            "file_path": file_path,
+            "query": "What is the first name of the first customer?",
+        }
+    )
+    assert isinstance(result, str)
+    assert "CSV file loaded" in result
+    print("\nAnalyze CSV Test Result:", result)
+def test_analyze_excel_file():
+    """Test Excel analysis functionality."""
+    excel_path = "test_files/Project-Management-Sample-Data.xlsx"
+    result = analyze_excel_file.invoke(
+        {
+            "file_path": excel_path,
+            "query": "What is the name of the first task?",
+        }
+    )
+    assert isinstance(result, str)
+    assert "Excel file loaded" in result
+    print("\nAnalyze Excel Test Result:", result)
+def test_transcribe_audio():
+    """Test audio transcription functionality."""
+    audio_path = "test_files/CECIL-I-NEED-YOU-CECIL.mp3"
+    result = transcribe_audio.invoke({"audio_file_path": audio_path})
+    assert isinstance(result, str)
+    print("\nTranscribe Audio Test Result:", result)
+def test_transcribe_youtube():
+    """Test YouTube transcription functionality."""
+    url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"  # Example video
+    result = transcribe_youtube.invoke({"url": url})
+    assert isinstance(result, str)
+    print("\nTranscribe YouTube Test Result:", result[:200])
+def test_video_frames_to_images():
+    """Test video frame extraction functionality."""
+    url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"  # Example video
+    with tempfile.TemporaryDirectory() as temp_dir:
+        result = video_frames_to_images.invoke(
+            {"url": url, "folder_name": temp_dir, "sample_interval_seconds": 5}
+        )
+        assert isinstance(result, list)
+        assert all(isinstance(path, str) for path in result)
+        assert os.path.exists(os.path.join(temp_dir, "frames"))
+        assert len(os.listdir(os.path.join(temp_dir, "frames"))) == len(result)
+        print(f"\nVideo Frames Test Result: Extracted {len(result)} frames")
+def test_use_vision_model():
+    """Test vision model functionality."""
+    image_paths = ["test_files/people.jpeg", "test_files/text_in_image.jpg"]
+    result = use_vision_model.invoke(
+        {
+            "question": "What do you see in these images?",
+            "image_paths": image_paths,
+        }
+    )
+    assert isinstance(result, str)
+    print("\nVision Model Test Result:", result)
+def test_review_youtube_video():
+    """Test YouTube video review functionality."""
+    url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"  # Example video
+    question = "What is the main topic of this video?"
+    result = review_youtube_video.invoke({"url": url, "question": question})
+    assert isinstance(result, str)
+    print("\nReview YouTube Test Result:", result)