Final_Assignment_Template / test_agent.py
mjschock's picture
Add configuration, graph, runner, and tools modules to enhance agent functionality. Introduce a Configuration class for managing parameters, implement an AgentRunner for executing the agent graph, and create tools for general search and mathematical calculations. Update test_agent.py to reflect new import paths and improve overall code organization.
13388e5 unverified
raw
history blame
10.6 kB
import logging
import pytest
from runner import AgentRunner
# Configure test logger
test_logger = logging.getLogger("test_agent")
test_logger.setLevel(logging.INFO)
# Suppress specific warnings
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning:httpx._models")
# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
QUESTIONS_URL = f"{DEFAULT_API_URL}/questions"
@pytest.fixture(scope="session")
def agent():
"""Fixture to create and return an AgentRunner instance."""
test_logger.info("Creating AgentRunner instance")
return AgentRunner()
# @pytest.fixture(scope="session")
# def questions_data():
# """Fixture to fetch questions from the API."""
# test_logger.info(f"Fetching questions from: {QUESTIONS_URL}")
# try:
# response = requests.get(QUESTIONS_URL, timeout=15)
# response.raise_for_status()
# data = response.json()
# if not data:
# test_logger.error("Fetched questions list is empty.")
# return []
# test_logger.info(f"Fetched {len(data)} questions.")
# return data
# except requests.exceptions.RequestException as e:
# test_logger.error(f"Error fetching questions: {e}")
# return []
# except requests.exceptions.JSONDecodeError as e:
# test_logger.error(f"Error decoding JSON response from questions endpoint: {e}")
# return []
# except Exception as e:
# test_logger.error(f"An unexpected error occurred fetching questions: {e}")
# return []
#
# class TestAppQuestions:
# """Test cases for questions from the app."""
#
# def test_first_app_question(self, agent, questions_data):
# """Test the agent's response to the first app question."""
# if not questions_data:
# pytest.skip("No questions available from API")
#
# first_question = questions_data[0]
# question_text = first_question.get("question")
# task_id = first_question.get("task_id")
#
# if not question_text or not task_id:
# pytest.skip("First question is missing required fields")
#
# test_logger.info(f"Testing with app question: {question_text}")
#
# response = agent(question_text)
# test_logger.info(f"Agent response: {response}")
#
# # Check that the response contains the expected information
# assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa"
# assert "studio albums" in response.lower(), "Response should mention studio albums"
# assert "2000" in response and "2009" in response, "Response should mention the year range"
#
# # Verify that a number is mentioned (either as word or digit)
# import re
# number_pattern = r'\b(one|two|three|four|five|six|seven|eight|nine|ten|\d+)\b'
# has_number = bool(re.search(number_pattern, response.lower()))
# assert has_number, "Response should include the number of albums"
#
# # Check for album names in the response
# known_albums = [
# "Corazón Libre",
# "Cantora",
# "Hermano",
# "Acústico",
# "Argentina quiere cantar"
# ]
# found_albums = [album for album in known_albums if album in response]
# assert len(found_albums) > 0, "Response should mention at least some of the known albums"
#
# # Check for a structured response
# assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \
# "Response should list albums with years"
class TestBasicCodeAgentCapabilities:
"""Test basic capabilities of the code agent."""
def setup_method(self):
"""Setup method to initialize the agent before each test."""
test_logger.info("Creating AgentRunner instance")
self.agent = AgentRunner()
def test_simple_math_calculation_with_steps(self):
"""Test that the agent can perform basic math calculations and log steps."""
question = "What is the result of the following operation: 5 + 3 + 1294.678?"
test_logger.info(f"Testing math calculation with question: {question}")
# Run the agent and get the response
response = self.agent(question)
# Verify the response contains the correct result
expected_result = str(5 + 3 + 1294.678)
assert (
expected_result in response
), f"Response should contain the result {expected_result}"
# Verify step logs exist and have required fields
assert self.agent.last_state is not None, "Agent should store last state"
assert "step_logs" in self.agent.last_state, "State should contain step_logs"
assert (
len(self.agent.last_state["step_logs"]) > 0
), "Should have at least one step logged"
# Verify each step has required fields
for step in self.agent.last_state["step_logs"]:
assert "step_number" in step, "Each step should have a step_number"
assert any(
key in step for key in ["thought", "code", "observation"]
), "Each step should have at least one of: thought, code, or observation"
# Verify the final answer is indicated
assert (
"final_answer" in response.lower()
), "Response should indicate it's providing an answer"
def test_document_qa_and_image_generation_with_steps(self):
"""Test that the agent can search for information and generate images, with step logging."""
question = (
"Search for information about the Mona Lisa and generate an image of it."
)
test_logger.info(
f"Testing document QA and image generation with question: {question}"
)
# Run the agent and get the response
response = self.agent(question)
# Verify the response contains both search and image generation
assert "mona lisa" in response.lower(), "Response should mention Mona Lisa"
assert "image" in response.lower(), "Response should mention image generation"
# Verify step logs exist and show logical progression
assert self.agent.last_state is not None, "Agent should store last state"
assert "step_logs" in self.agent.last_state, "State should contain step_logs"
assert (
len(self.agent.last_state["step_logs"]) > 1
), "Should have multiple steps logged"
# Verify steps show logical progression
steps = self.agent.last_state["step_logs"]
search_steps = [step for step in steps if "search" in str(step).lower()]
image_steps = [step for step in steps if "image" in str(step).lower()]
assert len(search_steps) > 0, "Should have search steps"
assert len(image_steps) > 0, "Should have image generation steps"
# Verify each step has required fields
for step in steps:
assert "step_number" in step, "Each step should have a step_number"
assert any(
key in step for key in ["thought", "code", "observation"]
), "Each step should have at least one of: thought, code, or observation"
def test_simple_math_calculation_with_steps():
"""Test that the agent can perform a simple math calculation and verify intermediate steps."""
agent = AgentRunner()
question = "What is the result of the following operation: 5 + 3 + 1294.678?"
# Process the question
response = agent(question)
# Verify step logs exist and have required fields
assert agent.last_state is not None, "Last state should be stored"
step_logs = agent.last_state.get("step_logs", [])
assert len(step_logs) > 0, "Should have recorded step logs"
for step in step_logs:
assert "step_number" in step, "Each step should have a step number"
assert any(
key in step for key in ["thought", "code", "observation"]
), "Each step should have at least one of thought/code/observation"
# Verify final answer
expected_result = 1302.678
# Extract all numbers from the response
import re
# First check for LaTeX formatting
latex_match = re.search(r"\\boxed{([^}]+)}", response)
if latex_match:
# Extract number from LaTeX box
latex_content = latex_match.group(1)
numbers = re.findall(r"\d+\.?\d*", latex_content)
else:
# Extract all numbers from the response
numbers = re.findall(r"\d+\.?\d*", response)
assert numbers, "Response should contain at least one number"
# Check if any number matches the expected result
has_correct_result = any(abs(float(n) - expected_result) < 0.001 for n in numbers)
assert (
has_correct_result
), f"Response should contain the result {expected_result}, got {response}"
# Verify the response indicates it's a final answer
assert (
"final_answer" in response.lower()
), "Response should indicate it's using final_answer"
def test_document_qa_and_image_generation_with_steps():
"""Test document QA and image generation with step verification."""
agent = AgentRunner()
question = "Can you search for information about the Mona Lisa and generate an image inspired by it?"
# Process the question
response = agent(question)
# Verify step logs exist and demonstrate logical progression
assert agent.last_state is not None, "Last state should be stored"
step_logs = agent.last_state.get("step_logs", [])
assert len(step_logs) > 0, "Should have recorded step logs"
# Check for search and image generation steps
has_search_step = False
has_image_step = False
for step in step_logs:
assert "step_number" in step, "Each step should have a step number"
assert any(
key in step for key in ["thought", "code", "observation"]
), "Each step should have at least one of thought/code/observation"
# Look for search and image steps in thoughts or code
step_content = str(step.get("thought", "")) + str(step.get("code", ""))
if "search" in step_content.lower():
has_search_step = True
if "image" in step_content.lower() or "dalle" in step_content.lower():
has_image_step = True
assert has_search_step, "Should include a search step"
assert has_image_step, "Should include an image generation step"
assert (
"final_answer" in response.lower()
), "Response should indicate it's using final_answer"
if __name__ == "__main__":
pytest.main([__file__, "-s", "-v", "-x"])