shanezhou24's picture
try Qwen3-32B model
99b4265
raw
history blame contribute delete
8.09 kB
import os
import json
import random
import requests
import yaml
import pprint
from dotenv import load_dotenv
from smolagents import CodeAgent, HfApiModel
from tools.final_answer import FinalAnswerTool
from tools.visit_webpage import VisitWebpageTool
from tools.web_search import DuckDuckGoSearchTool # Note: app.py imports this from tools.web_search and smolagents
# Load environment variables from .env file
load_dotenv()
hf_token = os.getenv('HUGGINGFACE_TOKEN')
if not hf_token:
raise ValueError("HUGGINGFACE_TOKEN not found in environment variables. Make sure a .env file exists.")
# --- Constants ---
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") # Use env var or default
QUESTIONS_URL = f"{API_URL}/questions"
QUESTIONS_FILE = "questions.json"
ANSWERS_LOG_FILE = "answer_log.jsonl"
PROMPTS_FILE = "prompts.yaml"
# --- Function to Fetch Questions ---
def fetch_and_save_questions(url: str, filename: str):
"""Fetches questions from the API and saves them to a local JSON file."""
if os.path.exists(filename):
print(f"Questions file '{filename}' already exists. Skipping download.")
return True
print(f"Fetching questions from: {url}")
try:
response = requests.get(url, timeout=30) # Increased timeout
response.raise_for_status()
questions_data = response.json()
if not questions_data:
print("Fetched questions list is empty.")
return False
with open(filename, 'w', encoding='utf-8') as f:
json.dump(questions_data, f, indent=4, ensure_ascii=False)
print(f"Successfully fetched {len(questions_data)} questions and saved to '{filename}'.")
return True
except requests.exceptions.RequestException as e:
print(f"Error fetching questions: {e}")
return False
except requests.exceptions.JSONDecodeError as e:
print(f"Error decoding JSON response from questions endpoint: {e}")
if 'response' in locals():
print(f"Response text: {response.text[:500]}")
return False
except Exception as e:
print(f"An unexpected error occurred fetching questions: {e}")
return False
# --- Function to Load Questions ---
def load_questions(filename: str) -> list:
"""Loads questions from a local JSON file."""
try:
with open(filename, 'r', encoding='utf-8') as f:
questions_data = json.load(f)
print(f"Successfully loaded {len(questions_data)} questions from '{filename}'.")
return questions_data
except FileNotFoundError:
print(f"Error: Questions file '{filename}' not found.")
return []
except json.JSONDecodeError:
print(f"Error: Could not decode JSON from '{filename}'.")
return []
except Exception as e:
print(f"An unexpected error occurred loading questions: {e}")
return []
# --- Function to Instantiate Agent ---
def create_agent():
"""Instantiates the CodeAgent with configuration similar to app.py."""
try:
# Load prompts
with open(PROMPTS_FILE, 'r') as stream:
prompt_templates = yaml.safe_load(stream)
except FileNotFoundError:
print(f"Error: Prompts file '{PROMPTS_FILE}' not found. Using default prompts.")
prompt_templates = None # Or handle differently
except yaml.YAMLError as e:
print(f"Error parsing prompts file '{PROMPTS_FILE}': {e}. Using default prompts.")
prompt_templates = None
# Configure model
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id=
# 'Qwen/Qwen2.5-Coder-32B-Instruct',
'Qwen/Qwen3-32B',
# custom_role_conversions=None, # Optional, kept default
token=hf_token,
)
# Create agent instance
try:
agent = CodeAgent(
model=model,
tools=[
FinalAnswerTool(),
DuckDuckGoSearchTool(),
VisitWebpageTool(),
],
max_steps=6,
verbosity_level=1, # Set higher (e.g., 2 or 3) to potentially see reasoning in stdout
# grammar=None, # Optional, kept default
# planning_interval=None, # Optional, kept default
name="SmolAgentTester",
description="An AI coding assistant for testing.",
prompt_templates=prompt_templates,
)
print("CodeAgent instantiated successfully.")
return agent
except Exception as e:
print(f"Error instantiating CodeAgent: {e}")
return None
# --- Main Execution Logic ---
if __name__ == "__main__":
print("Starting test script...")
# Step 1: Fetch and save questions
if not fetch_and_save_questions(QUESTIONS_URL, QUESTIONS_FILE):
print("Failed to fetch questions. Exiting.")
exit(1)
# Step 2: Load questions
all_questions = load_questions(QUESTIONS_FILE)
if not all_questions:
print("Failed to load questions. Exiting.")
exit(1)
# Step 3: Randomly pick 2 questions
if len(all_questions) < 2:
print("Warning: Fewer than 2 questions available. Testing with all available questions.")
selected_questions = all_questions
else:
selected_questions = random.sample(all_questions, 2)
print(f"\nSelected {len(selected_questions)} questions for testing:")
pprint.pprint(selected_questions)
print("-"*50)
# Step 4: Instantiate agent
agent = create_agent()
if agent is None:
print("Failed to create agent. Exiting.")
exit(1)
# Step 5: Run agent and log results
print(f"Running agent on {len(selected_questions)} questions...")
results_log = []
# Clear or create the log file
with open(ANSWERS_LOG_FILE, 'w', encoding='utf-8') as log_f:
pass # Just to clear the file initially
for item in selected_questions:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
print(f"Skipping item with missing task_id or question: {item}")
continue
print(f"\n--- Running Task ID: {task_id} ---")
print(f"Question: {question_text}")
try:
# Run the agent
# Note: The agent call might print its own reasoning steps depending on verbosity
model_answer = agent(question_text) # This now holds the CONCISE answer from FinalAnswerTool
print(f"\nAgent Final Answer: {model_answer}") # Renamed print for clarity
# Prepare result for logging
result = {
"task_id": task_id,
"question": question_text,
"model_answer": model_answer, # Directly use the concise answer
# "reasoning_trace": "TODO" # Add if agent provides trace separately
}
results_log.append(result)
# Append result to log file (JSON Lines format)
with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f:
json.dump(result, log_f, ensure_ascii=False)
log_f.write('\n')
except Exception as e:
print(f"\nAGENT ERROR on task {task_id}: {e}")
# Optionally log errors too
error_result = {"task_id": task_id, "model_answer": f"AGENT_ERROR: {e}"}
results_log.append(error_result)
with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f:
json.dump(error_result, log_f, ensure_ascii=False)
log_f.write('\n')
print("-"*50)
print(f"\nTest script finished. {len(results_log)} results logged to '{ANSWERS_LOG_FILE}'.")
print("Summary of results:")
pprint.pprint(results_log)
# Ensure prompts.yaml and .env exist in the same directory or adjust paths.
# Ensure necessary packages are installed: pip install requests pyyaml python-dotenv python-pprint smol-agents
# ... rest of the script to be added ...