Final_Assignment_Template_CURR

Sleeping

File size: 16,508 Bytes

import os
from dotenv import load_dotenv

# Import models from SmolaAgents
from smolagents import CodeAgent, LiteLLMModel, OpenAIServerModel

# Import SmolaAgents tools
from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool

# Import custom tools
from tools import (
    AddDocumentToVectorStoreTool,
    ArxivSearchTool,
    DownloadFileFromLinkTool,
    DuckDuckGoSearchTool,
    QueryVectorStoreTool,
    ReadFileContentTool,
    TranscibeVideoFileTool,
    TranscribeAudioTool,
    VisitWebpageTool,
    WikipediaSearchTool,
    image_question_answering,
)

# Import utility functions
from utils import extract_final_answer, replace_tool_mentions


class BoomBot:
    def __init__(self, provider="anthropic"):
        """
        Initialize the BoomBot with the specified provider.

        Args:
            provider (str): The model provider to use (e.g., "groq", "qwen", "gemma", "anthropic", "deepinfra", "meta")
        """
        load_dotenv()
        self.provider = provider
        self.model = self._initialize_model()
        self.agent = self._create_agent()

    def _initialize_model(self):
        """
        Initialize the appropriate model based on the provider.

        Returns:
            The initialized model object
        """
        if self.provider == "qwen":
            qwen_model = "ollama_chat/qwen3:8b"
            return LiteLLMModel(
                model_id=qwen_model,
                device="cuda",
                num_ctx=32768,
                temperature=0.6,
                top_p=0.95,
            )
        elif self.provider == "gemma":
            gemma_model = "ollama_chat/gemma3:12b-it-qat"
            return LiteLLMModel(
                model_id=gemma_model,
                num_ctx=65536,
                temperature=1.0,
                device="cuda",
                top_k=64,
                top_p=0.95,
                min_p=0.0,
            )
        elif self.provider == "anthropic":
            model_id = "anthropic/claude-3-5-haiku-latest"
            return LiteLLMModel(
                model_id=model_id,
                temperature=0.6,
                max_tokens=8192,
                api_key=os.getenv("ANTHROPIC_API_KEY"),
            )

        elif self.provider == "deepinfra":
            deepinfra_model = "Qwen/Qwen3-235B-A22B"
            # return OpenAIServerModel(
            #     model_id=deepinfra_model,
            #     api_base="https://api.deepinfra.com/v1/openai",
            #     api_key=os.getenv("ANTHROPIC_API_KEY"),
            #     flatten_messages_as_text=True,
            #     max_tokens=8192,
            #     temperature=0.1,
            # )
            return LiteLLMModel(
                 model_id="deepinfra/"+ deepinfra_model,
                api_base="https://api.deepinfra.com/v1/openai",
                api_key=os.getenv("DEEPINFRA_API_KEY"),
                flatten_messages_as_text=True,
                max_tokens=8192,
                temperature=0.7,
            )
        elif self.provider == "meta":
            meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
            meta_model = "Qwen/Qwen2.5-72B-Instruct"
            # return OpenAIServerModel(
            #     model_id=meta_model,
            #     api_base="https://api.deepinfra.com/v1/openai",
            #     api_key=os.getenv("DEEPINFRA_API_KEY"),
            #     flatten_messages_as_text=True,
            #     max_tokens=8192,
            #     temperature=0.7,
            # )
            return LiteLLMModel(
                 model_id="deepinfra/"+ meta_model,
                api_base="https://api.deepinfra.com/v1/openai",
                api_key=os.getenv("DEEPINFRA_API_KEY"),
                flatten_messages_as_text=True,
                max_tokens=8192,
                temperature=0.7,
            )
        elif self.provider == "google":
            meta_model = "google/gemini-2.5-flash"
            # return OpenAIServerModel(
            #     model_id=meta_model,
            #     api_base="https://api.deepinfra.com/v1/openai",
            #     api_key=os.getenv("DEEPINFRA_API_KEY"),
            #     flatten_messages_as_text=True,
            #     max_tokens=8192,
            #     temperature=0.7,
            # )
            return LiteLLMModel(
                 model_id="deepinfra/"+ meta_model,
                api_base="https://api.deepinfra.com/v1/openai",
                api_key=os.getenv("DEEPINFRA_API_KEY"),
                flatten_messages_as_text=True,
                max_tokens=8192,
                temperature=0.7,
            )
        elif self.provider == "groq":
            # Default to use groq's claude-3-opus or llama-3
            model_id = "claude-3-opus-20240229"
            return LiteLLMModel(model_id=model_id, temperature=0.7, max_tokens=8192)
        else:
            raise ValueError(f"Unsupported provider: {self.provider}")

    def _create_agent(self):
        """
        Create and configure the agent with all necessary tools.

        Returns:
            The configured CodeAgent
        """
        # Initialize tools
        download_file = DownloadFileFromLinkTool()
        read_file_content = ReadFileContentTool()
        visit_webpage = VisitWebpageTool()
        # transcribe_video = TranscibeVideoFileTool()
        transcribe_audio = TranscribeAudioTool()
        get_wikipedia_info = WikipediaSearchTool()
        web_searcher = DuckDuckGoSearchTool()
        arxiv_search = ArxivSearchTool()
        add_doc_vectorstore = AddDocumentToVectorStoreTool()
        retrieve_doc_vectorstore = QueryVectorStoreTool()

        # SmolaAgents default tools
        python_interpreter = PythonInterpreterTool()
        final_answer = FinalAnswerTool()

        # Combine all tools
        agent_tools = [
            web_searcher,
            download_file,
            read_file_content,
            visit_webpage,
            # transcribe_video,
            transcribe_audio,
            get_wikipedia_info,
            arxiv_search,
            add_doc_vectorstore,
            retrieve_doc_vectorstore,
            # image_question_answering,
            python_interpreter,
            final_answer,
        ]

        # Additional imports for the Python interpreter
        additional_imports = [
            # Built-in / core Python
            "json",
            "os",
            "glob",
            "pathlib",
            "argparse",
            "pickle",
            "io",
            "re",
            "datetime",
            "collections",
            "math",
            "random",
            "csv",
            "zipfile",
            "itertools",
            "functools",
            "requests",
            "bs4",
            # Data handling
            "pandas",
            "numpy",
            "dask",        # For handling large datasets
            "polars",      # Fast DataFrame alternative
            "pyarrow",     # For Arrow/Parquet file formats
            "h5py",        # For HDF5 files
            "openpyxl",    # Excel reading/writing
            "yaml",        # Config file parsing
            # Basic plotting
            "matplotlib",
            "seaborn"
        ]

        # Create the agent
        agent = CodeAgent(
            tools=agent_tools,
            max_steps=15,
            model=self.model,
            add_base_tools=False,
            stream_outputs=True,
            additional_authorized_imports=additional_imports,
        )

        # Modify the system prompt
        modified_prompt = replace_tool_mentions(agent.system_prompt)
        agent.system_prompt = modified_prompt

        return agent

    def _get_system_prompt(self):
        """
        Return the system prompt for the agent.

        Returns:
            str: The system prompt
        """
        return """
        YOUR BEHAVIOR GUIDELINES:
        • Do NOT make unfounded assumptions—always ground answers in reliable sources or search results.
        • For math or puzzles: break the problem into code/math, then solve programmatically.

        RESEARCH WORKFLOW:
        1. SEARCH  
            - Begin with web_search, wikipedia_search, or arxiv_search.  
            - Refine your query if results are weak—don't just retry the same terms.  
            - If one search tool yields little, try another before moving on to downloads.

        2. VISIT  
            - Use visit_webpage to preview content from promising links.  
            - If the content is long, complex, spans multiple pages, or may be needed later, do NOT rely solely on visit_webpage.  
            - Move quickly to downloading: avoid repeated visits when the content should be archived.

        3. DOWNLOAD AND ADD TO VECTORSTORE (MANDATORY IF CONTENT IS LONG, DENSE, COMPLEX, MULTIPLE FILES OR LINKS TO VISIT)  
            - Use download_file_from_link on all valuable resources (including html pages or pdfs).
            - Especially when a page is detailed, technical, or multi-part, downloading is preferred.
            - You can (and should) download webpages as HTML. Do this whenever the site might be referenced again later.

        4. INDEX & QUERY  
            - Immediately add downloaded files to the vector store using add_document_to_vector_store.
            - For complex tasks or unclear answers, prefer querying vector store over re-visiting pages.
            - If you've downloaded a file, **always index it unless clearly irrelevant.**

        5. READ  
            - Use read_file_content to analyze file contents (html, pdf, text).  
            - You can also use query_downloaded_documents for deeper understanding.

        6. EVALUATE  
            - ✅ If the answer is clear from current sources, respond.  
            - ❌ If not, continue iterating and analyzing downloaded material.

        FALLBACK & ADAPTATION:
        • If a tool fails, reformulate or switch tools.
        • For arXiv: web_search might help you find the paper; follow with direct download of the PDF via download_file_from_link.

        MANDATORY DOWNLOAD & INDEX WHEN:
        • The page is lengthy or technical (e.g., research papers, government sites, legal docs, blog posts with code).
        • You suspect you'll need to return to the content.
        • You are working on multi-hop reasoning or long-term memory tasks.

        COMMON TOOL CHAINS:
        • FACTUAL Qs:  
            web_search → final_answer  
        • CURRENT EVENTS:  
            web_search → visit_webpage → (download + index if needed) → final_answer  
        • DOCUMENT-BASED Qs:  
            web_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer  
        • ARXIV PAPERS:  
            arxiv_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer  
        • MEDIA ANALYSIS:  
            download_file_from_link → transcribe_audio → final_answer  

        FINAL ANSWER FORMAT:
        - Begin with "FINAL ANSWER: "  
        - Number → digits only (e.g., 42)  
        - String → exact text (e.g., Pope Francis) without quotation marks  
        - List → comma-separated, no brackets unless specified (e.g., 2, 3, 4)  
        - End with: FINAL ANSWER: <your_answer>
        """


    def run(self, question: str, task_id: str, to_download) -> str:
        """
        Run the agent with the given question, task_id, and download flag.

        Args:
            question (str): The question or task for the agent to process
            task_id (str): A unique identifier for the task
            to_download (Bool): Flag indicating whether to download resources

        Returns:
            str: The agent's response
        """
        prompt = self._get_system_prompt()
        # Task introduction
        prompt += "\nHere is the Task you need to solve:\n\n"
        prompt += f"Task: {question}\n\n"

        # Include download instructions if applicable
        if to_download:
            link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
            prompt += (
                "IMPORTANT: Before solving the task, you must download a required file.\n"
                f"Use the `download_file_from_link` tool with this link: {link}\n"
                "After downloading, use the appropriate tool to read or process the file "
                "before attempting to solve the task.\n\n"
            )

        # Run the agent with the given question
        result = self.agent.run(prompt)

        # Extract the final answer from the result
        final_answer = extract_final_answer(result)

        return final_answer



if __name__ == "__main__":
    import os
    import csv
    import time
    import requests
    from utils import load_online_qas, extract_final_answer

    CSV_FILE = "evals/llm_eval.csv"
    FIELDNAMES = ["model", "task_id", "question", "llm_answer", "processed_answer", "real_answer"]

    def ensure_csv():
        """Create the CSV file with header if it doesn't exist."""
        if not os.path.isfile(CSV_FILE):
            with open(CSV_FILE, mode="w", newline="", encoding="utf-8") as f:
                writer = csv.DictWriter(f, fieldnames=FIELDNAMES)
                writer.writeheader()

    def append_results(rows):
        """Append a list of dict rows to the CSV."""
        with open(CSV_FILE, mode="a", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=FIELDNAMES)
            for row in rows:
                writer.writerow(row)

    agent = BoomBot(provider="deepinfra")
    model_name = agent.provider  # e.g. "gemma"

    file_online   = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=True)
    nofile_online = load_online_qas(file_path=r"../../Final_Assignment_Template/allqas.jsonl", has_file=False)

    excluded_keywords = ["youtube", "video", "chess"]
    rows_to_append = []

    # 1) With downloadable files
    for entry in file_online:
        task_id     = entry["task_id"]
        question    = entry["Question"]
        real_answer = entry["Final answer"]
        file_name   = entry.get("file_name", "")
        to_download = bool(file_name)
        link        = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"

        if any(kw in question.lower() for kw in excluded_keywords):
            llm_answer = processed = "NOT ATTEMPTED"
        else:
            try:
                resp = requests.get(link)
                if resp.status_code != 200:
                    llm_answer = processed = "NOT ATTEMPTED"
                else:
                    llm_answer = agent.run(question, task_id, to_download)
                    processed = extract_final_answer(llm_answer).strip()
                    # time.sleep(10)
            except Exception as e:
                llm_answer = processed = f"[Error] {e}"
                # time.sleep(6)

        rows_to_append.append({
            "model":            model_name,
            "task_id":          task_id,
            "question":         question,
            "llm_answer":       llm_answer,
            "processed_answer": processed,
            "real_answer":      real_answer,
        })
        print("REAL ANSWER:", real_answer)

    # 2) Without downloadable files
    for entry in nofile_online:
        task_id     = entry["task_id"]
        question    = entry["Question"]
        real_answer = entry["Final answer"]

        if any(kw in question.lower() for kw in excluded_keywords):
            llm_answer = processed = "NOT ATTEMPTED"
        else:
            try:
                llm_answer = agent.run(question, task_id, to_download=False)
                processed = extract_final_answer(llm_answer).strip()
                # time.sleep(10)
            except Exception as e:
                llm_answer = processed = f"[Error] {e}"
                # time.sleep(6)

        rows_to_append.append({
            "model":            model_name,
            "task_id":          task_id,
            "question":         question,
            "llm_answer":       llm_answer,
            "processed_answer": processed,
            "real_answer":      real_answer,
        })
        print("REAL ANSWER:", real_answer)

    # ensure CSV exists and append
    ensure_csv()
    append_results(rows_to_append)

    print(f"✅ Appended {len(rows_to_append)} rows to {CSV_FILE}")