File size: 8,089 Bytes
028b4c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99b4265
 
 
028b4c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import os
import json
import random
import requests
import yaml
import pprint
from dotenv import load_dotenv

from smolagents import CodeAgent, HfApiModel
from tools.final_answer import FinalAnswerTool
from tools.visit_webpage import VisitWebpageTool
from tools.web_search import DuckDuckGoSearchTool # Note: app.py imports this from tools.web_search and smolagents

# Load environment variables from .env file
load_dotenv()
hf_token = os.getenv('HUGGINGFACE_TOKEN')
if not hf_token:
    raise ValueError("HUGGINGFACE_TOKEN not found in environment variables. Make sure a .env file exists.")

# --- Constants ---
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") # Use env var or default
QUESTIONS_URL = f"{API_URL}/questions"
QUESTIONS_FILE = "questions.json"
ANSWERS_LOG_FILE = "answer_log.jsonl"
PROMPTS_FILE = "prompts.yaml"

# --- Function to Fetch Questions ---
def fetch_and_save_questions(url: str, filename: str):
    """Fetches questions from the API and saves them to a local JSON file."""
    if os.path.exists(filename):
        print(f"Questions file '{filename}' already exists. Skipping download.")
        return True
    
    print(f"Fetching questions from: {url}")
    try:
        response = requests.get(url, timeout=30) # Increased timeout
        response.raise_for_status()
        questions_data = response.json()
        if not questions_data:
             print("Fetched questions list is empty.")
             return False

        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(questions_data, f, indent=4, ensure_ascii=False)
        print(f"Successfully fetched {len(questions_data)} questions and saved to '{filename}'.")
        return True
    except requests.exceptions.RequestException as e:
        print(f"Error fetching questions: {e}")
        return False
    except requests.exceptions.JSONDecodeError as e:
         print(f"Error decoding JSON response from questions endpoint: {e}")
         if 'response' in locals():
             print(f"Response text: {response.text[:500]}")
         return False
    except Exception as e:
        print(f"An unexpected error occurred fetching questions: {e}")
        return False

# --- Function to Load Questions ---
def load_questions(filename: str) -> list:
    """Loads questions from a local JSON file."""
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            questions_data = json.load(f)
        print(f"Successfully loaded {len(questions_data)} questions from '{filename}'.")
        return questions_data
    except FileNotFoundError:
        print(f"Error: Questions file '{filename}' not found.")
        return []
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from '{filename}'.")
        return []
    except Exception as e:
        print(f"An unexpected error occurred loading questions: {e}")
        return []

# --- Function to Instantiate Agent ---
def create_agent():
    """Instantiates the CodeAgent with configuration similar to app.py."""
    try:
        # Load prompts
        with open(PROMPTS_FILE, 'r') as stream:
            prompt_templates = yaml.safe_load(stream)
    except FileNotFoundError:
        print(f"Error: Prompts file '{PROMPTS_FILE}' not found. Using default prompts.")
        prompt_templates = None # Or handle differently
    except yaml.YAMLError as e:
        print(f"Error parsing prompts file '{PROMPTS_FILE}': {e}. Using default prompts.")
        prompt_templates = None

    # Configure model
    model = HfApiModel(
        max_tokens=2096,
        temperature=0.5,
        model_id=
            # 'Qwen/Qwen2.5-Coder-32B-Instruct',
            'Qwen/Qwen3-32B', 
        # custom_role_conversions=None, # Optional, kept default
        token=hf_token, 
    )

    # Create agent instance
    try:
        agent = CodeAgent(
            model=model,
            tools=[
                FinalAnswerTool(), 
                DuckDuckGoSearchTool(),
                VisitWebpageTool(),
            ],
            max_steps=6,
            verbosity_level=1, # Set higher (e.g., 2 or 3) to potentially see reasoning in stdout
            # grammar=None, # Optional, kept default
            # planning_interval=None, # Optional, kept default
            name="SmolAgentTester",
            description="An AI coding assistant for testing.",
            prompt_templates=prompt_templates, 
        )
        print("CodeAgent instantiated successfully.")
        return agent
    except Exception as e:
        print(f"Error instantiating CodeAgent: {e}")
        return None

# --- Main Execution Logic ---
if __name__ == "__main__":
    print("Starting test script...")
    
    # Step 1: Fetch and save questions
    if not fetch_and_save_questions(QUESTIONS_URL, QUESTIONS_FILE):
        print("Failed to fetch questions. Exiting.")
        exit(1)

    # Step 2: Load questions
    all_questions = load_questions(QUESTIONS_FILE)
    if not all_questions:
        print("Failed to load questions. Exiting.")
        exit(1)

    # Step 3: Randomly pick 2 questions
    if len(all_questions) < 2:
        print("Warning: Fewer than 2 questions available. Testing with all available questions.")
        selected_questions = all_questions
    else:
        selected_questions = random.sample(all_questions, 2)

    print(f"\nSelected {len(selected_questions)} questions for testing:")
    pprint.pprint(selected_questions)
    print("-"*50)

    # Step 4: Instantiate agent
    agent = create_agent()
    if agent is None:
        print("Failed to create agent. Exiting.")
        exit(1)

    # Step 5: Run agent and log results
    print(f"Running agent on {len(selected_questions)} questions...")
    results_log = []
    
    # Clear or create the log file
    with open(ANSWERS_LOG_FILE, 'w', encoding='utf-8') as log_f:
        pass # Just to clear the file initially

    for item in selected_questions:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            print(f"Skipping item with missing task_id or question: {item}")
            continue
        
        print(f"\n--- Running Task ID: {task_id} ---")
        print(f"Question: {question_text}")
        
        try:
            # Run the agent
            # Note: The agent call might print its own reasoning steps depending on verbosity
            model_answer = agent(question_text) # This now holds the CONCISE answer from FinalAnswerTool
            print(f"\nAgent Final Answer: {model_answer}") # Renamed print for clarity

            # Prepare result for logging
            result = {
                "task_id": task_id,
                "question": question_text,
                "model_answer": model_answer, # Directly use the concise answer
                # "reasoning_trace": "TODO" # Add if agent provides trace separately
            }
            results_log.append(result)

            # Append result to log file (JSON Lines format)
            with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f:
                json.dump(result, log_f, ensure_ascii=False)
                log_f.write('\n')
                
        except Exception as e:
             print(f"\nAGENT ERROR on task {task_id}: {e}")
             # Optionally log errors too
             error_result = {"task_id": task_id, "model_answer": f"AGENT_ERROR: {e}"}
             results_log.append(error_result)
             with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f:
                 json.dump(error_result, log_f, ensure_ascii=False)
                 log_f.write('\n')

    print("-"*50)
    print(f"\nTest script finished. {len(results_log)} results logged to '{ANSWERS_LOG_FILE}'.")
    print("Summary of results:")
    pprint.pprint(results_log)

# Ensure prompts.yaml and .env exist in the same directory or adjust paths.
# Ensure necessary packages are installed: pip install requests pyyaml python-dotenv python-pprint smol-agents

# ... rest of the script to be added ...