Spaces:
Build error
Build error
Add configuration, graph, runner, and tools modules to enhance agent functionality. Introduce a Configuration class for managing parameters, implement an AgentRunner for executing the agent graph, and create tools for general search and mathematical calculations. Update test_agent.py to reflect new import paths and improve overall code organization.
13388e5
unverified
import logging | |
import pytest | |
from runner import AgentRunner | |
# Configure test logger | |
test_logger = logging.getLogger("test_agent") | |
test_logger.setLevel(logging.INFO) | |
# Suppress specific warnings | |
pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning:httpx._models") | |
# Constants | |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
QUESTIONS_URL = f"{DEFAULT_API_URL}/questions" | |
def agent(): | |
"""Fixture to create and return an AgentRunner instance.""" | |
test_logger.info("Creating AgentRunner instance") | |
return AgentRunner() | |
# @pytest.fixture(scope="session") | |
# def questions_data(): | |
# """Fixture to fetch questions from the API.""" | |
# test_logger.info(f"Fetching questions from: {QUESTIONS_URL}") | |
# try: | |
# response = requests.get(QUESTIONS_URL, timeout=15) | |
# response.raise_for_status() | |
# data = response.json() | |
# if not data: | |
# test_logger.error("Fetched questions list is empty.") | |
# return [] | |
# test_logger.info(f"Fetched {len(data)} questions.") | |
# return data | |
# except requests.exceptions.RequestException as e: | |
# test_logger.error(f"Error fetching questions: {e}") | |
# return [] | |
# except requests.exceptions.JSONDecodeError as e: | |
# test_logger.error(f"Error decoding JSON response from questions endpoint: {e}") | |
# return [] | |
# except Exception as e: | |
# test_logger.error(f"An unexpected error occurred fetching questions: {e}") | |
# return [] | |
# | |
# class TestAppQuestions: | |
# """Test cases for questions from the app.""" | |
# | |
# def test_first_app_question(self, agent, questions_data): | |
# """Test the agent's response to the first app question.""" | |
# if not questions_data: | |
# pytest.skip("No questions available from API") | |
# | |
# first_question = questions_data[0] | |
# question_text = first_question.get("question") | |
# task_id = first_question.get("task_id") | |
# | |
# if not question_text or not task_id: | |
# pytest.skip("First question is missing required fields") | |
# | |
# test_logger.info(f"Testing with app question: {question_text}") | |
# | |
# response = agent(question_text) | |
# test_logger.info(f"Agent response: {response}") | |
# | |
# # Check that the response contains the expected information | |
# assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa" | |
# assert "studio albums" in response.lower(), "Response should mention studio albums" | |
# assert "2000" in response and "2009" in response, "Response should mention the year range" | |
# | |
# # Verify that a number is mentioned (either as word or digit) | |
# import re | |
# number_pattern = r'\b(one|two|three|four|five|six|seven|eight|nine|ten|\d+)\b' | |
# has_number = bool(re.search(number_pattern, response.lower())) | |
# assert has_number, "Response should include the number of albums" | |
# | |
# # Check for album names in the response | |
# known_albums = [ | |
# "Corazón Libre", | |
# "Cantora", | |
# "Hermano", | |
# "Acústico", | |
# "Argentina quiere cantar" | |
# ] | |
# found_albums = [album for album in known_albums if album in response] | |
# assert len(found_albums) > 0, "Response should mention at least some of the known albums" | |
# | |
# # Check for a structured response | |
# assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \ | |
# "Response should list albums with years" | |
class TestBasicCodeAgentCapabilities: | |
"""Test basic capabilities of the code agent.""" | |
def setup_method(self): | |
"""Setup method to initialize the agent before each test.""" | |
test_logger.info("Creating AgentRunner instance") | |
self.agent = AgentRunner() | |
def test_simple_math_calculation_with_steps(self): | |
"""Test that the agent can perform basic math calculations and log steps.""" | |
question = "What is the result of the following operation: 5 + 3 + 1294.678?" | |
test_logger.info(f"Testing math calculation with question: {question}") | |
# Run the agent and get the response | |
response = self.agent(question) | |
# Verify the response contains the correct result | |
expected_result = str(5 + 3 + 1294.678) | |
assert ( | |
expected_result in response | |
), f"Response should contain the result {expected_result}" | |
# Verify step logs exist and have required fields | |
assert self.agent.last_state is not None, "Agent should store last state" | |
assert "step_logs" in self.agent.last_state, "State should contain step_logs" | |
assert ( | |
len(self.agent.last_state["step_logs"]) > 0 | |
), "Should have at least one step logged" | |
# Verify each step has required fields | |
for step in self.agent.last_state["step_logs"]: | |
assert "step_number" in step, "Each step should have a step_number" | |
assert any( | |
key in step for key in ["thought", "code", "observation"] | |
), "Each step should have at least one of: thought, code, or observation" | |
# Verify the final answer is indicated | |
assert ( | |
"final_answer" in response.lower() | |
), "Response should indicate it's providing an answer" | |
def test_document_qa_and_image_generation_with_steps(self): | |
"""Test that the agent can search for information and generate images, with step logging.""" | |
question = ( | |
"Search for information about the Mona Lisa and generate an image of it." | |
) | |
test_logger.info( | |
f"Testing document QA and image generation with question: {question}" | |
) | |
# Run the agent and get the response | |
response = self.agent(question) | |
# Verify the response contains both search and image generation | |
assert "mona lisa" in response.lower(), "Response should mention Mona Lisa" | |
assert "image" in response.lower(), "Response should mention image generation" | |
# Verify step logs exist and show logical progression | |
assert self.agent.last_state is not None, "Agent should store last state" | |
assert "step_logs" in self.agent.last_state, "State should contain step_logs" | |
assert ( | |
len(self.agent.last_state["step_logs"]) > 1 | |
), "Should have multiple steps logged" | |
# Verify steps show logical progression | |
steps = self.agent.last_state["step_logs"] | |
search_steps = [step for step in steps if "search" in str(step).lower()] | |
image_steps = [step for step in steps if "image" in str(step).lower()] | |
assert len(search_steps) > 0, "Should have search steps" | |
assert len(image_steps) > 0, "Should have image generation steps" | |
# Verify each step has required fields | |
for step in steps: | |
assert "step_number" in step, "Each step should have a step_number" | |
assert any( | |
key in step for key in ["thought", "code", "observation"] | |
), "Each step should have at least one of: thought, code, or observation" | |
def test_simple_math_calculation_with_steps(): | |
"""Test that the agent can perform a simple math calculation and verify intermediate steps.""" | |
agent = AgentRunner() | |
question = "What is the result of the following operation: 5 + 3 + 1294.678?" | |
# Process the question | |
response = agent(question) | |
# Verify step logs exist and have required fields | |
assert agent.last_state is not None, "Last state should be stored" | |
step_logs = agent.last_state.get("step_logs", []) | |
assert len(step_logs) > 0, "Should have recorded step logs" | |
for step in step_logs: | |
assert "step_number" in step, "Each step should have a step number" | |
assert any( | |
key in step for key in ["thought", "code", "observation"] | |
), "Each step should have at least one of thought/code/observation" | |
# Verify final answer | |
expected_result = 1302.678 | |
# Extract all numbers from the response | |
import re | |
# First check for LaTeX formatting | |
latex_match = re.search(r"\\boxed{([^}]+)}", response) | |
if latex_match: | |
# Extract number from LaTeX box | |
latex_content = latex_match.group(1) | |
numbers = re.findall(r"\d+\.?\d*", latex_content) | |
else: | |
# Extract all numbers from the response | |
numbers = re.findall(r"\d+\.?\d*", response) | |
assert numbers, "Response should contain at least one number" | |
# Check if any number matches the expected result | |
has_correct_result = any(abs(float(n) - expected_result) < 0.001 for n in numbers) | |
assert ( | |
has_correct_result | |
), f"Response should contain the result {expected_result}, got {response}" | |
# Verify the response indicates it's a final answer | |
assert ( | |
"final_answer" in response.lower() | |
), "Response should indicate it's using final_answer" | |
def test_document_qa_and_image_generation_with_steps(): | |
"""Test document QA and image generation with step verification.""" | |
agent = AgentRunner() | |
question = "Can you search for information about the Mona Lisa and generate an image inspired by it?" | |
# Process the question | |
response = agent(question) | |
# Verify step logs exist and demonstrate logical progression | |
assert agent.last_state is not None, "Last state should be stored" | |
step_logs = agent.last_state.get("step_logs", []) | |
assert len(step_logs) > 0, "Should have recorded step logs" | |
# Check for search and image generation steps | |
has_search_step = False | |
has_image_step = False | |
for step in step_logs: | |
assert "step_number" in step, "Each step should have a step number" | |
assert any( | |
key in step for key in ["thought", "code", "observation"] | |
), "Each step should have at least one of thought/code/observation" | |
# Look for search and image steps in thoughts or code | |
step_content = str(step.get("thought", "")) + str(step.get("code", "")) | |
if "search" in step_content.lower(): | |
has_search_step = True | |
if "image" in step_content.lower() or "dalle" in step_content.lower(): | |
has_image_step = True | |
assert has_search_step, "Should include a search step" | |
assert has_image_step, "Should include an image generation step" | |
assert ( | |
"final_answer" in response.lower() | |
), "Response should indicate it's using final_answer" | |
if __name__ == "__main__": | |
pytest.main([__file__, "-s", "-v", "-x"]) | |