|
import os |
|
import json |
|
import random |
|
import requests |
|
import yaml |
|
import pprint |
|
from dotenv import load_dotenv |
|
|
|
from smolagents import CodeAgent, HfApiModel |
|
from tools.final_answer import FinalAnswerTool |
|
from tools.visit_webpage import VisitWebpageTool |
|
from tools.web_search import DuckDuckGoSearchTool |
|
|
|
|
|
load_dotenv() |
|
hf_token = os.getenv('HUGGINGFACE_TOKEN') |
|
if not hf_token: |
|
raise ValueError("HUGGINGFACE_TOKEN not found in environment variables. Make sure a .env file exists.") |
|
|
|
|
|
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") |
|
QUESTIONS_URL = f"{API_URL}/questions" |
|
QUESTIONS_FILE = "questions.json" |
|
ANSWERS_LOG_FILE = "answer_log.jsonl" |
|
PROMPTS_FILE = "prompts.yaml" |
|
|
|
|
|
def fetch_and_save_questions(url: str, filename: str): |
|
"""Fetches questions from the API and saves them to a local JSON file.""" |
|
if os.path.exists(filename): |
|
print(f"Questions file '{filename}' already exists. Skipping download.") |
|
return True |
|
|
|
print(f"Fetching questions from: {url}") |
|
try: |
|
response = requests.get(url, timeout=30) |
|
response.raise_for_status() |
|
questions_data = response.json() |
|
if not questions_data: |
|
print("Fetched questions list is empty.") |
|
return False |
|
|
|
with open(filename, 'w', encoding='utf-8') as f: |
|
json.dump(questions_data, f, indent=4, ensure_ascii=False) |
|
print(f"Successfully fetched {len(questions_data)} questions and saved to '{filename}'.") |
|
return True |
|
except requests.exceptions.RequestException as e: |
|
print(f"Error fetching questions: {e}") |
|
return False |
|
except requests.exceptions.JSONDecodeError as e: |
|
print(f"Error decoding JSON response from questions endpoint: {e}") |
|
if 'response' in locals(): |
|
print(f"Response text: {response.text[:500]}") |
|
return False |
|
except Exception as e: |
|
print(f"An unexpected error occurred fetching questions: {e}") |
|
return False |
|
|
|
|
|
def load_questions(filename: str) -> list: |
|
"""Loads questions from a local JSON file.""" |
|
try: |
|
with open(filename, 'r', encoding='utf-8') as f: |
|
questions_data = json.load(f) |
|
print(f"Successfully loaded {len(questions_data)} questions from '{filename}'.") |
|
return questions_data |
|
except FileNotFoundError: |
|
print(f"Error: Questions file '{filename}' not found.") |
|
return [] |
|
except json.JSONDecodeError: |
|
print(f"Error: Could not decode JSON from '{filename}'.") |
|
return [] |
|
except Exception as e: |
|
print(f"An unexpected error occurred loading questions: {e}") |
|
return [] |
|
|
|
|
|
def create_agent(): |
|
"""Instantiates the CodeAgent with configuration similar to app.py.""" |
|
try: |
|
|
|
with open(PROMPTS_FILE, 'r') as stream: |
|
prompt_templates = yaml.safe_load(stream) |
|
except FileNotFoundError: |
|
print(f"Error: Prompts file '{PROMPTS_FILE}' not found. Using default prompts.") |
|
prompt_templates = None |
|
except yaml.YAMLError as e: |
|
print(f"Error parsing prompts file '{PROMPTS_FILE}': {e}. Using default prompts.") |
|
prompt_templates = None |
|
|
|
|
|
model = HfApiModel( |
|
max_tokens=2096, |
|
temperature=0.5, |
|
model_id= |
|
|
|
'Qwen/Qwen3-32B', |
|
|
|
token=hf_token, |
|
) |
|
|
|
|
|
try: |
|
agent = CodeAgent( |
|
model=model, |
|
tools=[ |
|
FinalAnswerTool(), |
|
DuckDuckGoSearchTool(), |
|
VisitWebpageTool(), |
|
], |
|
max_steps=6, |
|
verbosity_level=1, |
|
|
|
|
|
name="SmolAgentTester", |
|
description="An AI coding assistant for testing.", |
|
prompt_templates=prompt_templates, |
|
) |
|
print("CodeAgent instantiated successfully.") |
|
return agent |
|
except Exception as e: |
|
print(f"Error instantiating CodeAgent: {e}") |
|
return None |
|
|
|
|
|
if __name__ == "__main__": |
|
print("Starting test script...") |
|
|
|
|
|
if not fetch_and_save_questions(QUESTIONS_URL, QUESTIONS_FILE): |
|
print("Failed to fetch questions. Exiting.") |
|
exit(1) |
|
|
|
|
|
all_questions = load_questions(QUESTIONS_FILE) |
|
if not all_questions: |
|
print("Failed to load questions. Exiting.") |
|
exit(1) |
|
|
|
|
|
if len(all_questions) < 2: |
|
print("Warning: Fewer than 2 questions available. Testing with all available questions.") |
|
selected_questions = all_questions |
|
else: |
|
selected_questions = random.sample(all_questions, 2) |
|
|
|
print(f"\nSelected {len(selected_questions)} questions for testing:") |
|
pprint.pprint(selected_questions) |
|
print("-"*50) |
|
|
|
|
|
agent = create_agent() |
|
if agent is None: |
|
print("Failed to create agent. Exiting.") |
|
exit(1) |
|
|
|
|
|
print(f"Running agent on {len(selected_questions)} questions...") |
|
results_log = [] |
|
|
|
|
|
with open(ANSWERS_LOG_FILE, 'w', encoding='utf-8') as log_f: |
|
pass |
|
|
|
for item in selected_questions: |
|
task_id = item.get("task_id") |
|
question_text = item.get("question") |
|
if not task_id or question_text is None: |
|
print(f"Skipping item with missing task_id or question: {item}") |
|
continue |
|
|
|
print(f"\n--- Running Task ID: {task_id} ---") |
|
print(f"Question: {question_text}") |
|
|
|
try: |
|
|
|
|
|
model_answer = agent(question_text) |
|
print(f"\nAgent Final Answer: {model_answer}") |
|
|
|
|
|
result = { |
|
"task_id": task_id, |
|
"question": question_text, |
|
"model_answer": model_answer, |
|
|
|
} |
|
results_log.append(result) |
|
|
|
|
|
with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f: |
|
json.dump(result, log_f, ensure_ascii=False) |
|
log_f.write('\n') |
|
|
|
except Exception as e: |
|
print(f"\nAGENT ERROR on task {task_id}: {e}") |
|
|
|
error_result = {"task_id": task_id, "model_answer": f"AGENT_ERROR: {e}"} |
|
results_log.append(error_result) |
|
with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f: |
|
json.dump(error_result, log_f, ensure_ascii=False) |
|
log_f.write('\n') |
|
|
|
print("-"*50) |
|
print(f"\nTest script finished. {len(results_log)} results logged to '{ANSWERS_LOG_FILE}'.") |
|
print("Summary of results:") |
|
pprint.pprint(results_log) |
|
|
|
|
|
|
|
|
|
|