import os import json import random import requests import yaml import pprint from dotenv import load_dotenv from smolagents import CodeAgent, HfApiModel from tools.final_answer import FinalAnswerTool from tools.visit_webpage import VisitWebpageTool from tools.web_search import DuckDuckGoSearchTool # Note: app.py imports this from tools.web_search and smolagents # Load environment variables from .env file load_dotenv() hf_token = os.getenv('HUGGINGFACE_TOKEN') if not hf_token: raise ValueError("HUGGINGFACE_TOKEN not found in environment variables. Make sure a .env file exists.") # --- Constants --- API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") # Use env var or default QUESTIONS_URL = f"{API_URL}/questions" QUESTIONS_FILE = "questions.json" ANSWERS_LOG_FILE = "answer_log.jsonl" PROMPTS_FILE = "prompts.yaml" # --- Function to Fetch Questions --- def fetch_and_save_questions(url: str, filename: str): """Fetches questions from the API and saves them to a local JSON file.""" if os.path.exists(filename): print(f"Questions file '{filename}' already exists. Skipping download.") return True print(f"Fetching questions from: {url}") try: response = requests.get(url, timeout=30) # Increased timeout response.raise_for_status() questions_data = response.json() if not questions_data: print("Fetched questions list is empty.") return False with open(filename, 'w', encoding='utf-8') as f: json.dump(questions_data, f, indent=4, ensure_ascii=False) print(f"Successfully fetched {len(questions_data)} questions and saved to '{filename}'.") return True except requests.exceptions.RequestException as e: print(f"Error fetching questions: {e}") return False except requests.exceptions.JSONDecodeError as e: print(f"Error decoding JSON response from questions endpoint: {e}") if 'response' in locals(): print(f"Response text: {response.text[:500]}") return False except Exception as e: print(f"An unexpected error occurred fetching questions: {e}") return False # --- Function to Load Questions --- def load_questions(filename: str) -> list: """Loads questions from a local JSON file.""" try: with open(filename, 'r', encoding='utf-8') as f: questions_data = json.load(f) print(f"Successfully loaded {len(questions_data)} questions from '{filename}'.") return questions_data except FileNotFoundError: print(f"Error: Questions file '{filename}' not found.") return [] except json.JSONDecodeError: print(f"Error: Could not decode JSON from '{filename}'.") return [] except Exception as e: print(f"An unexpected error occurred loading questions: {e}") return [] # --- Function to Instantiate Agent --- def create_agent(): """Instantiates the CodeAgent with configuration similar to app.py.""" try: # Load prompts with open(PROMPTS_FILE, 'r') as stream: prompt_templates = yaml.safe_load(stream) except FileNotFoundError: print(f"Error: Prompts file '{PROMPTS_FILE}' not found. Using default prompts.") prompt_templates = None # Or handle differently except yaml.YAMLError as e: print(f"Error parsing prompts file '{PROMPTS_FILE}': {e}. Using default prompts.") prompt_templates = None # Configure model model = HfApiModel( max_tokens=2096, temperature=0.5, model_id= # 'Qwen/Qwen2.5-Coder-32B-Instruct', 'Qwen/Qwen3-32B', # custom_role_conversions=None, # Optional, kept default token=hf_token, ) # Create agent instance try: agent = CodeAgent( model=model, tools=[ FinalAnswerTool(), DuckDuckGoSearchTool(), VisitWebpageTool(), ], max_steps=6, verbosity_level=1, # Set higher (e.g., 2 or 3) to potentially see reasoning in stdout # grammar=None, # Optional, kept default # planning_interval=None, # Optional, kept default name="SmolAgentTester", description="An AI coding assistant for testing.", prompt_templates=prompt_templates, ) print("CodeAgent instantiated successfully.") return agent except Exception as e: print(f"Error instantiating CodeAgent: {e}") return None # --- Main Execution Logic --- if __name__ == "__main__": print("Starting test script...") # Step 1: Fetch and save questions if not fetch_and_save_questions(QUESTIONS_URL, QUESTIONS_FILE): print("Failed to fetch questions. Exiting.") exit(1) # Step 2: Load questions all_questions = load_questions(QUESTIONS_FILE) if not all_questions: print("Failed to load questions. Exiting.") exit(1) # Step 3: Randomly pick 2 questions if len(all_questions) < 2: print("Warning: Fewer than 2 questions available. Testing with all available questions.") selected_questions = all_questions else: selected_questions = random.sample(all_questions, 2) print(f"\nSelected {len(selected_questions)} questions for testing:") pprint.pprint(selected_questions) print("-"*50) # Step 4: Instantiate agent agent = create_agent() if agent is None: print("Failed to create agent. Exiting.") exit(1) # Step 5: Run agent and log results print(f"Running agent on {len(selected_questions)} questions...") results_log = [] # Clear or create the log file with open(ANSWERS_LOG_FILE, 'w', encoding='utf-8') as log_f: pass # Just to clear the file initially for item in selected_questions: task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: print(f"Skipping item with missing task_id or question: {item}") continue print(f"\n--- Running Task ID: {task_id} ---") print(f"Question: {question_text}") try: # Run the agent # Note: The agent call might print its own reasoning steps depending on verbosity model_answer = agent(question_text) # This now holds the CONCISE answer from FinalAnswerTool print(f"\nAgent Final Answer: {model_answer}") # Renamed print for clarity # Prepare result for logging result = { "task_id": task_id, "question": question_text, "model_answer": model_answer, # Directly use the concise answer # "reasoning_trace": "TODO" # Add if agent provides trace separately } results_log.append(result) # Append result to log file (JSON Lines format) with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f: json.dump(result, log_f, ensure_ascii=False) log_f.write('\n') except Exception as e: print(f"\nAGENT ERROR on task {task_id}: {e}") # Optionally log errors too error_result = {"task_id": task_id, "model_answer": f"AGENT_ERROR: {e}"} results_log.append(error_result) with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f: json.dump(error_result, log_f, ensure_ascii=False) log_f.write('\n') print("-"*50) print(f"\nTest script finished. {len(results_log)} results logged to '{ANSWERS_LOG_FILE}'.") print("Summary of results:") pprint.pprint(results_log) # Ensure prompts.yaml and .env exist in the same directory or adjust paths. # Ensure necessary packages are installed: pip install requests pyyaml python-dotenv python-pprint smol-agents # ... rest of the script to be added ...