Spaces:

agents-course
/

Final_Assignment_Template

Running

App Files Files Community

248

Upload 6 files

#67

by jimmy1024 - opened May 12

base: refs/heads/main

←

from: refs/pr/67

Discussion Files changed

+384

-133

Files changed (5) hide show

agent.py +156 -0
app.py +76 -131
gitattributes +35 -0
logic.py +108 -0
requirements.txt +9 -2

agent.py ADDED Viewed

	@@ -0,0 +1,156 @@

+from typing import Any, Dict, List, Optional
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from duckduckgo_search import DDGS
+import re
+import math
+class WebSearchTool:
+    def __init__(self):
+        self.search = DDGS()
+    def run(self, query: str, max_results: int = 3) -> str:
+        """Perform a web search and return formatted results."""
+        try:
+            results = list(self.search.text(query, max_results=max_results))
+            formatted_results = []
+            for r in results:
+                formatted_results.append(f"Title: {r['title']}\nSnippet: {r['body']}\nURL: {r['link']}\n")
+            return "\n".join(formatted_results)
+        except Exception as e:
+            return f"Error performing web search: {str(e)}"
+class Calculator:
+    def run(self, expression: str) -> str:
+        """Evaluate mathematical expressions safely."""
+        try:
+            # Remove any characters that aren't numbers, operators, or parentheses
+            cleaned = re.sub(r'[^0-9+\-*/().\ ]', '', expression)
+            # Evaluate the expression
+            result = eval(cleaned, {"__builtins__": {}}, {"math": math})
+            return str(result)
+        except Exception as e:
+            return f"Error in calculation: {str(e)}"
+class GaiaAgent:
+    def __init__(self):
+        # Initialize Qwen-7B model
+        self.model_name = "Qwen/Qwen-7B"
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_name,
+            trust_remote_code=True
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            device_map="auto",
+            trust_remote_code=True
+        ).eval()
+        # Initialize tools
+        self.tools = {
+            "web_search": WebSearchTool(),
+            "calculator": Calculator()
+        }
+        # System prompt template
+        self.system_prompt = """You are a helpful AI assistant with access to the following tools:
+1. web_search: Search the internet for current information
+2. calculator: Perform mathematical calculations
+To use a tool, respond with: <tool>tool_name|input</tool>
+For example: <tool>calculator|2 + 2</tool> or <tool>web_search|latest news about AI</tool>
+If you don't need any tools to answer, just provide your response directly.
+Always explain your reasoning before using tools or providing final answers."""
+    def _generate_response(self, prompt: str, max_length: int = 2048) -> str:
+        """Generate a response using the Qwen model."""
+        try:
+            input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(self.model.device)
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    input_ids,
+                    max_length=max_length,
+                    num_return_sequences=1,
+                    temperature=0.7,
+                    do_sample=True,
+                    pad_token_id=self.tokenizer.pad_token_id
+                )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract only the assistant's response
+            response = response.split(prompt)[-1].strip()
+            return response
+        except Exception as e:
+            return f"Error generating response: {str(e)}"
+    def _extract_tool_calls(self, response: str) -> List[Dict[str, str]]:
+        """Extract tool calls from the response."""
+        tool_pattern = r'<tool>(.*?)\|(.*?)</tool>'
+        matches = re.finditer(tool_pattern, response)
+        tool_calls = []
+        for match in matches:
+            tool_name = match.group(1).strip()
+            tool_input = match.group(2).strip()
+            tool_calls.append({"name": tool_name, "input": tool_input})
+        return tool_calls
+    def _execute_tool_call(self, tool_call: Dict[str, str]) -> str:
+        """Execute a single tool call and return the result."""
+        tool_name = tool_call["name"]
+        tool_input = tool_call["input"]
+        if tool_name not in self.tools:
+            return f"Error: Tool '{tool_name}' not found"
+        try:
+            result = self.tools[tool_name].run(tool_input)
+            return result
+        except Exception as e:
+            return f"Error executing {tool_name}: {str(e)}"
+    def process_question(self, question: str) -> str:
+        """Process a single question and return the answer."""
+        # Construct the full prompt
+        full_prompt = f"{self.system_prompt}\n\nQuestion: {question}\n\nAnswer:"
+        # Get initial response
+        response = self._generate_response(full_prompt)
+        # Extract and execute any tool calls
+        tool_calls = self._extract_tool_calls(response)
+        if tool_calls:
+            # Execute each tool call and collect results
+            tool_results = []
+            for tool_call in tool_calls:
+                result = self._execute_tool_call(tool_call)
+                tool_results.append(f"Tool {tool_call['name']} result: {result}")
+            # Generate final response with tool results
+            tool_results_str = "\n".join(tool_results)
+            final_prompt = f"{full_prompt}\n{response}\n\nTool Results:\n{tool_results_str}\n\nFinal Answer:"
+            final_response = self._generate_response(final_prompt)
+            return final_response
+        return response
+    def get_answer(self, question_data: Dict[str, Any]) -> Optional[str]:
+        """Process a question from the GAIA benchmark and return an answer."""
+        try:
+            # Extract the actual question from the question data
+            question = question_data.get("question", "")
+            if not question:
+                return None
+            # Process the question and get the answer
+            answer = self.process_question(question)
+            return answer
+        except Exception as e:
+            print(f"Error processing question: {str(e)}")
+            return None

app.py CHANGED Viewed

@@ -1,160 +1,98 @@
 import os
 import gradio as gr
-import requests
-import inspect
 import pandas as pd
-# (Keep Constants as is)
-# --- Constants ---
-DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
-    """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
     # 2. Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-        if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
-    results_log = []
-    answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
-            continue
-        try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-        except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
-with gr.Blocks() as demo:
     gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
@@ -162,20 +100,21 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +122,20 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import os
+import agent
 import gradio as gr
+import logic
 import pandas as pd
+from dotenv import load_dotenv
+load_dotenv()
+def run_and_submit_all(
+    profile: gr.OAuthProfile | None,
+) -> tuple[str, pd.DataFrame | None]:
+    """Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
+    Args:
+        profile: An optional gr.OAuthProfile object containing user information
+            if the user is logged in. If None, the user is not logged in.
+    Returns:
+        tuple[str, pd.DataFrame | None]: A tuple containing:
+            - A string representing the status of the run and submission process.
+              This could be a success message, an error message, or a message
+              indicating that no answers were produced.
+            - A pandas DataFrame containing the results log. This DataFrame will
+              be displayed in the Gradio interface. It can be None if an error
+              occurred before the agent was run.
+    """
+    # 0. Get user details
+    space_id = os.getenv("SPACE_ID")
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
+    # 1. Instantiate Agent
     try:
+        gaia_agent = agent.GaiaAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     # 2. Fetch Questions
     try:
+        questions_data = logic.fetch_all_questions()
     except Exception as e:
+        return str(e), None
+    # 3. Run the Agent
+    results_log, answers_payload = logic.run_agent(gaia_agent, questions_data)
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare & Submit Answers
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload,
+    }
+    print(
+        f"Agent finished. Submitting {len(answers_payload)} answers for user '"
+        f"{username}'..."
+    )
+    return logic.submit_answers(submission_data, results_log)
 # --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as gaia_ui:
     gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's
+        logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses
+        your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your
+        agent, submit answers, and see the score.
         ---
         **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is
+        the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to
+        encourage you to develop your own, more robust solution. For instance for the
+        delay process of the submit button, a solution could be to cache the answers
+        and submit in a separate action or even to answer the questions in async.
         """
     )
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(
+        label="Run Status / Submission Result", lines=5, interactive=False
+    )
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
+        fn=run_and_submit_all, inputs=None, outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(
+            f"   Repo Tree URL: https://huggingface.co/spaces/"
+            f"{space_id_startup}/tree/main"
+        )
     else:
+        print(
+            "ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL "
+            "cannot be determined."
+        )
+    print("-" * (60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    gaia_ui.launch(debug=True, share=True)

gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

logic.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+from typing import Any, Dict, List, Tuple
+import pandas as pd
+import requests
+def fetch_all_questions() -> List[Dict[str, Any]]:
+    """Fetch all questions from the GAIA benchmark API."""
+    try:
+        # The actual endpoint will be provided by the GAIA benchmark
+        api_url = os.getenv("GAIA_API_URL", "")
+        if not api_url:
+            raise ValueError("GAIA_API_URL environment variable not set")
+        response = requests.get(f"{api_url}/questions")
+        response.raise_for_status()
+        questions = response.json()
+        return questions
+    except Exception as e:
+        raise Exception(f"Failed to fetch questions: {str(e)}")
+def run_agent(agent: Any, questions: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+    """Run the agent on all questions and collect results.
+    Args:
+        agent: The GaiaAgent instance
+        questions: List of question data from the API
+    Returns:
+        Tuple containing:
+        - List of result logs for display
+        - List of answer payloads for submission
+    """
+    results_log = []
+    answers_payload = []
+    for question in questions:
+        question_id = question.get("id", "unknown")
+        question_text = question.get("question", "")
+        try:
+            # Get answer from agent
+            answer = agent.get_answer(question)
+            # Log result
+            result_entry = {
+                "Question ID": question_id,
+                "Question": question_text,
+                "Answer": answer if answer else "No answer provided",
+                "Status": "Success" if answer else "Failed"
+            }
+            results_log.append(result_entry)
+            # Prepare submission payload if answer was generated
+            if answer:
+                answer_entry = {
+                    "question_id": question_id,
+                    "answer": answer
+                }
+                answers_payload.append(answer_entry)
+        except Exception as e:
+            # Log error
+            result_entry = {
+                "Question ID": question_id,
+                "Question": question_text,
+                "Answer": f"Error: {str(e)}",
+                "Status": "Failed"
+            }
+            results_log.append(result_entry)
+    return results_log, answers_payload
+def submit_answers(submission_data: Dict[str, Any], results_log: List[Dict[str, Any]]) -> Tuple[str, pd.DataFrame]:
+    """Submit answers to the GAIA benchmark API.
+    Args:
+        submission_data: Dictionary containing submission details
+        results_log: List of result logs for display
+    Returns:
+        Tuple containing:
+        - Status message string
+        - DataFrame of results for display
+    """
+    try:
+        # The actual endpoint will be provided by the GAIA benchmark
+        api_url = os.getenv("GAIA_API_URL", "")
+        if not api_url:
+            raise ValueError("GAIA_API_URL environment variable not set")
+        # Submit answers
+        response = requests.post(
+            f"{api_url}/submit",
+            json=submission_data
+        )
+        response.raise_for_status()
+        # Create DataFrame for display
+        results_df = pd.DataFrame(results_log)
+        # Return success message and results
+        return "Answers submitted successfully!", results_df
+    except Exception as e:
+        # If submission fails, still show results but with error message
+        results_df = pd.DataFrame(results_log)
+        return f"Error submitting answers: {str(e)}", results_df

requirements.txt CHANGED Viewed

@@ -1,2 +1,9 @@
-gradio
-requests

+gradio==5.25.2
+transformers
+torch
+accelerate
+duckduckgo-search
+python-dotenv
+pandas
+requests
+numpy