MasterOfHugs commited on
Commit
b6c4ba9
Β·
verified Β·
1 Parent(s): 0a1558c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -192
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import re
3
  import json
@@ -5,209 +6,235 @@ import logging
5
  import requests
6
  import pandas as pd
7
  import gradio as gr
 
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
 
10
- # --- Logging ---
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
- MODEL_NAME = "bigscience/bloomz-1b1"
 
17
 
18
- # --- Load model & tokenizer (Causal LM for BLOOM) ---
19
  logger.info(f"Loading tokenizer and model: {MODEL_NAME} ...")
20
  try:
21
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
22
- # Ensure pad token exists
 
23
  if tokenizer.pad_token_id is None:
24
  tokenizer.pad_token_id = tokenizer.eos_token_id
25
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
 
 
 
26
  logger.info("Model and tokenizer loaded successfully.")
27
  except Exception as e:
28
  logger.exception(f"Error loading model/tokenizer for '{MODEL_NAME}': {e}")
29
  raise
30
 
31
- # --- Dummy Tool Example (kept for compatibility) ---
32
- class AddTwoNumbers:
33
- """Tool that adds two integers"""
 
 
34
  @staticmethod
35
- def run(a: int, b: int) -> int:
36
- return a + b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- tools_description = "Available tool: AddTwoNumbers.run(a, b)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  # --- Reasoning Agent ---
41
  class ReasoningAgent:
42
  def __init__(self):
43
  self.tools_description = tools_description
44
- logger.info("ReasoningAgent initialized.")
45
-
46
- # Few-shot + strict instruction to try to get JSON-only output
47
  self.few_shot = (
48
- "Example:\n"
49
- "Question: What is 2 + 3?\n"
50
- "Answer in JSON:\n"
51
- '{\n'
52
- ' "thought": "I will add 2 and 3 step by step",\n'
53
- ' "action": "AddTwoNumbers.run(2, 3)",\n'
54
- ' "observation": "5",\n'
55
- ' "answer": "5"\n'
56
- '}\n\n'
57
- "Example:\n"
58
- "Question: Who discovered X (unknown)?\n"
59
- "Answer in JSON:\n"
60
- '{\n'
61
- ' "thought": "I do not know this fact",\n'
62
- ' "action": "None",\n'
63
- ' "observation": "",\n'
64
- ' "answer": "I do not know."\n'
65
- '}\n\n'
66
  )
 
67
 
68
- self.instruction = (
69
- "You are an AI reasoning agent. "
70
- "Available tool: AddTwoNumbers.run(a, b). "
71
- "Answer the question and respond ONLY with a SINGLE valid JSON object (no explanatory text, no code). "
72
- 'Format exactly as: {"thought":..., "action":..., "observation":..., "answer":...}. '
73
- 'If you are unsure, set "answer": "I do not know."'
 
 
 
 
74
  )
 
 
75
 
76
- def generate(self, prompt: str, max_new_tokens: int = 220) -> str:
77
- """Generate text with the causal model, returning only generated suffix (not prompt)."""
78
- inputs = tokenizer(prompt, return_tensors="pt")
79
- input_len = inputs["input_ids"].shape[1]
80
- try:
81
- out = model.generate(
82
- **inputs,
83
- max_new_tokens=max_new_tokens,
84
- do_sample=False,
85
- # greedy generation by default; adjust if you want beams/sampling
86
- pad_token_id=tokenizer.pad_token_id
87
- )
88
- # `out[0]` contains prompt + generated tokens for causal LM
89
- full_decoded = tokenizer.decode(out[0], skip_special_tokens=True)
90
- # Try to remove the prompt prefix from the decoded string to get only new text
91
- prompt_decoded = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
92
- if full_decoded.startswith(prompt_decoded):
93
- generated = full_decoded[len(prompt_decoded):].strip()
94
- else:
95
- # fallback heuristic: try slicing tokens
96
- generated_tokens = out[0][input_len:]
97
- if generated_tokens.nelement() == 0:
98
- generated = ""
99
- else:
100
- generated = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
101
- return generated
102
- except Exception as e:
103
- logger.exception("Generation error: %s", e)
104
- raise
105
-
106
- def extract_first_json(self, text: str):
107
- """Extract the first JSON object found in text. Returns Python object or None."""
108
- if text is None:
109
- return None
110
- # Regex to find first balanced-ish JSON object (handles simple nested objects)
111
  m = re.search(r"\{(?:[^{}]|\{[^{}]*\})*\}", text, re.DOTALL)
112
  if not m:
113
  return None
114
  json_text = m.group(0)
115
  try:
116
- return json.loads(json_text)
 
117
  except json.JSONDecodeError:
118
- # Try to fix common issues: replace single quotes -> double quotes, None->null, trailing commas
119
  fixed = json_text.replace("'", '"')
120
- fixed = re.sub(r"\bNone\b", "null", fixed)
121
- fixed = re.sub(r",\s*}", "}", fixed)
122
- fixed = re.sub(r",\s*\]", "]", fixed)
123
  try:
124
- return json.loads(fixed)
 
125
  except Exception:
126
- logger.debug("Failed to decode JSON even after fixes. Raw: %s", json_text)
127
  return None
128
 
129
  def __call__(self, question: str) -> str:
130
- logger.info("\n=== Processing Question ===\n%s\n", question)
131
- prompt = (
132
- self.few_shot
133
- + "\n\n"
134
- + self.instruction
135
- + f"\n\nQuestion: {question}\nAnswer in JSON:"
136
- )
137
 
 
138
  try:
139
- generated = self.generate(prompt, max_new_tokens=300)
140
- # If generated is empty, try decoding entire output (fallback)
141
- if not generated:
142
- logger.info("Generated empty suffix, trying full decode fallback.")
143
- generated = self.generate(prompt, max_new_tokens=300)
144
-
145
- logger.info("=== Generated (raw) ===\n%s\n", generated[:4000])
 
 
 
 
146
  except Exception as e:
147
- logger.exception("Generation failed: %s", e)
148
  return f"AGENT ERROR: Generation failed: {e}"
149
 
150
- # Try to get first JSON object
151
- parsed = self.extract_first_json(generated)
152
- if parsed is None:
153
- # If no JSON found, try to interpret simple plain answers (single token/number/word)
154
- answer_guess = generated.strip().splitlines()[0] if generated.strip() else "I do not know."
155
- parsed = {"thought": "", "action": "None", "observation": "", "answer": answer_guess}
156
-
157
- # Normalize fields
158
- thought = parsed.get("thought", "").strip() if isinstance(parsed.get("thought", ""), str) else ""
159
- action = parsed.get("action", parsed.get("tool", "None")) or ""
160
- observation = parsed.get("observation", "") or ""
161
- answer = parsed.get("answer", "") or ""
162
-
163
- # If action is a string invoking AddTwoNumbers, execute it
164
- if isinstance(action, str) and action.strip().startswith("AddTwoNumbers"):
 
165
  try:
166
- args_text = action[action.find("(")+1:action.find(")")]
167
- args = [a.strip() for a in args_text.split(",") if a.strip() != ""]
168
- if len(args) == 2:
169
- a_val = int(args[0])
170
- b_val = int(args[1])
171
- obs = AddTwoNumbers.run(a_val, b_val)
172
- observation = str(obs)
173
- # If answer is placeholder or empty, set to observation
174
- if not answer or str(answer).strip().lower() in ["", "none", "null", "i do not know."]:
175
- answer = str(obs)
176
- logger.info("βœ… Executed tool: AddTwoNumbers.run(%s, %s) -> %s", a_val, b_val, obs)
177
- else:
178
- observation = "TOOL ERROR: wrong number of args"
179
- logger.warning("Tool call had wrong number of arguments: %s", action)
180
  except Exception as e:
181
- observation = f"TOOL ERROR: {e}"
182
- logger.exception("Tool execution error: %s", e)
183
-
184
- # Sanity checks and fallbacks
185
- if isinstance(answer, str):
186
- answer_str = answer.strip()
 
 
 
 
 
 
 
 
187
  else:
188
- answer_str = str(answer)
 
189
 
190
- # Heuristics to avoid returning the few-shot examples as the final answer
191
- if self.few_shot.strip()[:30] in answer_str:
192
- answer_str = "I do not know."
193
-
194
- if not answer_str or answer_str.lower() in ["none", "null", "i do not know.", "i do not know"]:
195
- answer_str = "I do not know."
196
 
197
  # Log internal state
198
  logger.info("πŸ’­ Thought: %s", thought)
199
  logger.info("πŸ”§ Action: %s", action)
200
- logger.info("πŸ‘€ Observation: %s", observation)
201
- logger.info("πŸ“ Answer: %s", answer_str)
202
  logger.info("-" * 60)
203
 
204
- return answer_str
 
 
205
 
206
- # --- Run & Submit function (keeps the same interface) ---
207
  def run_and_submit_all(profile: gr.OAuthProfile | None):
208
- """
209
- Fetch questions, run the agent on them, submit answers, and return status + results table.
210
- """
211
  if profile:
212
  username = profile.username
213
  logger.info("User logged in: %s", username)
@@ -218,36 +245,27 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
218
  questions_url = f"{DEFAULT_API_URL}/questions"
219
  submit_url = f"{DEFAULT_API_URL}/submit"
220
 
221
- # 1. Fetch questions
222
- logger.info("Fetching questions from: %s", questions_url)
223
  try:
224
  response = requests.get(questions_url, timeout=15)
225
  response.raise_for_status()
226
  questions_data = response.json()
227
- if not isinstance(questions_data, list) or len(questions_data) == 0:
228
- logger.warning("Fetched questions list is empty or invalid format.")
229
  return "Fetched questions list is empty or invalid format.", None
230
  except Exception as e:
231
- logger.exception("Error fetching questions: %s", e)
232
  return f"Error fetching questions: {e}", None
233
 
234
- # 2. Instantiate agent
235
- try:
236
- agent = ReasoningAgent()
237
- except Exception as e:
238
- logger.exception("Error instantiating agent: %s", e)
239
- return f"Error initializing agent: {e}", None
240
-
241
- # 3. Run agent on questions
242
  results_log = []
243
  answers_payload = []
244
- logger.info("Running agent on %d questions...", len(questions_data))
245
 
 
246
  for item in questions_data:
247
  task_id = item.get("task_id")
248
  question_text = item.get("question")
249
  if not task_id or question_text is None:
250
- logger.warning("Skipping item with missing task_id or question: %s", item)
251
  continue
252
  try:
253
  submitted_answer = agent(question_text)
@@ -258,7 +276,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
258
  "Submitted Answer": submitted_answer
259
  })
260
  except Exception as e:
261
- logger.exception("Error running agent on task %s: %s", task_id, e)
262
  results_log.append({
263
  "Task ID": task_id,
264
  "Question": question_text,
@@ -269,17 +287,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
269
  logger.warning("Agent did not produce any answers to submit.")
270
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
271
 
272
- # 4. Submit
273
  submission_data = {
274
  "username": username.strip(),
275
  "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
276
  "answers": answers_payload
277
  }
278
- logger.info("Submitting %d answers to: %s", len(answers_payload), submit_url)
 
279
  try:
280
- response = requests.post(submit_url, json=submission_data, timeout=60)
281
- response.raise_for_status()
282
- result_data = response.json()
283
  final_status = (
284
  f"Submission Successful!\n"
285
  f"User: {result_data.get('username')}\n"
@@ -287,33 +305,32 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
287
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
288
  f"Message: {result_data.get('message', 'No message received.')}"
289
  )
290
- logger.info("Submission successful.")
291
  results_df = pd.DataFrame(results_log)
 
292
  return final_status, results_df
293
  except requests.exceptions.HTTPError as e:
294
- logger.exception("Submission HTTP error: %s", e)
295
  try:
296
- err_json = e.response.json()
297
- detail = err_json.get("detail", e.response.text)
298
  except Exception:
299
  detail = str(e)
300
- status_message = f"Submission Failed: {detail}"
301
  results_df = pd.DataFrame(results_log)
302
- return status_message, results_df
303
  except Exception as e:
304
- logger.exception("Submission error: %s", e)
305
  results_df = pd.DataFrame(results_log)
306
  return f"Submission failed: {e}", results_df
307
 
 
308
  # --- Gradio Interface ---
309
  with gr.Blocks() as demo:
310
- gr.Markdown("# Reasoning Agent Runner (BLOOMZ Causal LM)")
311
  gr.Markdown(
312
  """
313
  Instructions:
314
- 1. Login with Hugging Face (use the Login button).
315
  2. Click 'Run Evaluation & Submit All Answers'.
316
- 3. The agent will attempt step-by-step reasoning and submit answers.
317
  """
318
  )
319
  gr.LoginButton()
@@ -327,20 +344,5 @@ with gr.Blocks() as demo:
327
  )
328
 
329
  if __name__ == "__main__":
330
- print("\n" + "-"*30 + " App Starting " + "-"*30)
331
- # Print environment hints
332
- space_host_startup = os.getenv("SPACE_HOST")
333
- space_id_startup = os.getenv("SPACE_ID")
334
- if space_host_startup:
335
- print(f"βœ… SPACE_HOST found: {space_host_startup}")
336
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
337
- else:
338
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
339
- if space_id_startup:
340
- print(f"βœ… SPACE_ID found: {space_id_startup}")
341
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
342
- else:
343
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
344
- print("-"*(60 + len(" App Starting ")) + "\n")
345
-
346
  demo.launch(debug=True, share=False)
 
1
+ # app.py
2
  import os
3
  import re
4
  import json
 
6
  import requests
7
  import pandas as pd
8
  import gradio as gr
9
+ import torch
10
  from transformers import AutoTokenizer, AutoModelForCausalLM
11
 
12
+ # --- Logging setup ---
13
  logging.basicConfig(level=logging.INFO)
14
  logger = logging.getLogger(__name__)
15
 
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
+ # Change MODEL_NAME if you want a smaller / different causal model
19
+ MODEL_NAME = os.getenv("MODEL_NAME", "bigscience/bloomz-1b1")
20
 
21
+ # --- Load tokenizer & model (causal LM) ---
22
  logger.info(f"Loading tokenizer and model: {MODEL_NAME} ...")
23
  try:
24
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
25
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
26
+ # ensure pad_token_id set
27
  if tokenizer.pad_token_id is None:
28
  tokenizer.pad_token_id = tokenizer.eos_token_id
29
+ # move to device
30
+ device = "cuda" if torch.cuda.is_available() else "cpu"
31
+ model.to(device)
32
+ model.eval()
33
  logger.info("Model and tokenizer loaded successfully.")
34
  except Exception as e:
35
  logger.exception(f"Error loading model/tokenizer for '{MODEL_NAME}': {e}")
36
  raise
37
 
38
+ # --- Simple Wikipedia search tool (synchronous, HTTP requests) ---
39
+ class WikipediaTool:
40
+ """Simple helper to search Wikipedia and fetch page extracts."""
41
+ API_BASE = "https://en.wikipedia.org/w/api.php"
42
+
43
  @staticmethod
44
+ def search(query: str, limit: int = 3):
45
+ """Return a list of search results (title, snippet)."""
46
+ params = {
47
+ "action": "query",
48
+ "list": "search",
49
+ "srsearch": query,
50
+ "srlimit": limit,
51
+ "format": "json",
52
+ }
53
+ r = requests.get(WikipediaTool.API_BASE, params=params, timeout=10)
54
+ r.raise_for_status()
55
+ data = r.json()
56
+ results = []
57
+ for item in data.get("query", {}).get("search", []):
58
+ results.append({
59
+ "title": item.get("title"),
60
+ "snippet": re.sub("<.*?>", "", item.get("snippet", "")) # strip HTML tags
61
+ })
62
+ return results
63
 
64
+ @staticmethod
65
+ def get_extract(title: str, chars: int = 800):
66
+ """Return the extract (plain text) for a Wikipedia page title."""
67
+ params = {
68
+ "action": "query",
69
+ "prop": "extracts",
70
+ "explaintext": True,
71
+ "exchars": chars,
72
+ "titles": title,
73
+ "format": "json",
74
+ "redirects": 1
75
+ }
76
+ r = requests.get(WikipediaTool.API_BASE, params=params, timeout=10)
77
+ r.raise_for_status()
78
+ data = r.json()
79
+ pages = data.get("query", {}).get("pages", {})
80
+ for pid, page in pages.items():
81
+ return {"title": page.get("title"), "extract": page.get("extract", "")}
82
+ return {"title": title, "extract": ""}
83
+
84
+
85
+ # --- Tools description presented to the model ---
86
+ tools_description = (
87
+ "Available tool: Wikipedia.search(query) -> returns a short list of titles+snippets.\n"
88
+ " Wikipedia.get_extract(title) -> returns the page extract (plain text).\n"
89
+ "If you want the agent to use the web, call these tools by writing action like:\n"
90
+ " Search: Wikipedia.search(\"query string\")\n"
91
+ " Extract: Wikipedia.get_extract(\"Exact Page Title\")\n"
92
+ "If unsure or cannot answer from tools, set answer to \"I do not know.\""
93
+ )
94
 
95
  # --- Reasoning Agent ---
96
  class ReasoningAgent:
97
  def __init__(self):
98
  self.tools_description = tools_description
99
+ # small few-shot just to show JSON format (kept minimal)
 
 
100
  self.few_shot = (
101
+ "Format example (ONLY RETURN a single JSON object):\n"
102
+ '{"thought":"...","action":"...","observation":"...","answer":"..."}\n'
103
+ "Action should be a single tool call or 'None'.\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  )
105
+ logger.info("ReasoningAgent initialized.")
106
 
107
+ def build_prompt(self, question: str) -> str:
108
+ # Keep prompt compact and explicit: produce ONLY one JSON object.
109
+ instruction = (
110
+ "You are an AI reasoning agent. Use the available tools if needed.\n"
111
+ + self.tools_description + "\n"
112
+ "Answer ONLY with a SINGLE valid JSON object (no extra text, no code). "
113
+ "Use exactly the keys: thought, action, observation, answer.\n"
114
+ "If you are going to call a tool, set action to the tool call as a single string; "
115
+ "if not using tools set action to \"None\". "
116
+ "If unsure, set answer to \"I do not know.\""
117
  )
118
+ prompt = f"{self.few_shot}\n{instruction}\n\nQuestion: {question}\nAnswer in JSON:"
119
+ return prompt
120
 
121
+ def parse_action(self, action_str: str):
122
+ """
123
+ Recognize actions of the form:
124
+ Wikipedia.search("query")
125
+ Wikipedia.get_extract("Title")
126
+ Returns a tuple (tool_name, arg) or (None, None).
127
+ """
128
+ if not isinstance(action_str, str):
129
+ return None, None
130
+ action_str = action_str.strip()
131
+ # search pattern Wikipedia.search("...")
132
+ m = re.match(r'Wikipedia\.search\(\s*["\'](.+?)["\']\s*\)\s*$', action_str)
133
+ if m:
134
+ return "search", m.group(1)
135
+ m2 = re.match(r'Wikipedia\.get_extract\(\s*["\'](.+?)["\']\s*\)\s*$', action_str)
136
+ if m2:
137
+ return "extract", m2.group(1)
138
+ return None, None
139
+
140
+ def extract_json(self, text: str):
141
+ # Try to find the first JSON object in the generated text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  m = re.search(r"\{(?:[^{}]|\{[^{}]*\})*\}", text, re.DOTALL)
143
  if not m:
144
  return None
145
  json_text = m.group(0)
146
  try:
147
+ parsed = json.loads(json_text)
148
+ return parsed
149
  except json.JSONDecodeError:
150
+ # try to fix common issues: single quotes -> double quotes
151
  fixed = json_text.replace("'", '"')
 
 
 
152
  try:
153
+ parsed = json.loads(fixed)
154
+ return parsed
155
  except Exception:
 
156
  return None
157
 
158
  def __call__(self, question: str) -> str:
159
+ logger.info(f"\n=== Processing Question ===\n{question}\n")
160
+ prompt = self.build_prompt(question)
 
 
 
 
 
161
 
162
+ # Tokenize & generate
163
  try:
164
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
165
+ out = model.generate(
166
+ **inputs,
167
+ max_new_tokens=220,
168
+ do_sample=False,
169
+ num_beams=3,
170
+ early_stopping=True,
171
+ pad_token_id=tokenizer.pad_token_id
172
+ )
173
+ generated = tokenizer.decode(out[0], skip_special_tokens=True).strip()
174
+ logger.info("=== Generated (raw) ===\n%s", generated[:2000])
175
  except Exception as e:
176
+ logger.exception("Generation error: %s", e)
177
  return f"AGENT ERROR: Generation failed: {e}"
178
 
179
+ # Extract JSON
180
+ parsed = self.extract_json(generated)
181
+ if not parsed:
182
+ # fallback: return "I do not know."
183
+ logger.warning("No valid JSON parsed from model output. Returning I do not know.")
184
+ return "I do not know."
185
+
186
+ # Ensure keys exist
187
+ thought = parsed.get("thought", "")
188
+ action = parsed.get("action", "None")
189
+ observation = parsed.get("observation", "")
190
+ answer = parsed.get("answer", "")
191
+
192
+ # If model asked to call Wikipedia tools, do it
193
+ tool_name, tool_arg = self.parse_action(action if action is not None else "")
194
+ if tool_name == "search":
195
  try:
196
+ results = WikipediaTool.search(tool_arg, limit=3)
197
+ observation = json.dumps(results, ensure_ascii=False)
198
+ # if answer empty, try to set it to a succinct message
199
+ if not answer or str(answer).strip() in ["", "I do not know.", "None"]:
200
+ answer = f"Found {len(results)} wiki search results for '{tool_arg}'."
201
+ logger.info("βœ… Executed tool: Wikipedia.search('%s') -> %d results", tool_arg, len(results))
 
 
 
 
 
 
 
 
202
  except Exception as e:
203
+ observation = f"Wikipedia search error: {e}"
204
+ logger.exception("Wikipedia search error")
205
+ answer = "I do not know."
206
+ elif tool_name == "extract":
207
+ try:
208
+ res = WikipediaTool.get_extract(tool_arg, chars=1500)
209
+ observation = json.dumps(res, ensure_ascii=False)
210
+ if not answer or str(answer).strip() in ["", "I do not know.", "None"]:
211
+ answer = f"Extract fetched for '{res.get('title')}'."
212
+ logger.info("βœ… Executed tool: Wikipedia.get_extract('%s')", tool_arg)
213
+ except Exception as e:
214
+ observation = f"Wikipedia extract error: {e}"
215
+ logger.exception("Wikipedia extract error")
216
+ answer = "I do not know."
217
  else:
218
+ # no tool or unrecognized action
219
+ logger.debug("No tool called or action unrecognized: %s", action)
220
 
221
+ # Final sanitization
222
+ if not answer or str(answer).strip() in ["", "None", "null"]:
223
+ answer = "I do not know."
 
 
 
224
 
225
  # Log internal state
226
  logger.info("πŸ’­ Thought: %s", thought)
227
  logger.info("πŸ”§ Action: %s", action)
228
+ logger.info("πŸ‘€ Observation: %s", observation if len(str(observation))<400 else str(observation)[:400]+"...")
229
+ logger.info("πŸ“ Answer: %s", answer)
230
  logger.info("-" * 60)
231
 
232
+ # Return only the answer string for submission (same behavior as before)
233
+ return answer
234
+
235
 
236
+ # --- Run & Submit ---
237
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
238
  if profile:
239
  username = profile.username
240
  logger.info("User logged in: %s", username)
 
245
  questions_url = f"{DEFAULT_API_URL}/questions"
246
  submit_url = f"{DEFAULT_API_URL}/submit"
247
 
 
 
248
  try:
249
  response = requests.get(questions_url, timeout=15)
250
  response.raise_for_status()
251
  questions_data = response.json()
252
+ if not isinstance(questions_data, list):
253
+ logger.error("Unexpected questions_data format: %s", type(questions_data))
254
  return "Fetched questions list is empty or invalid format.", None
255
  except Exception as e:
256
+ logger.exception("Error fetching questions")
257
  return f"Error fetching questions: {e}", None
258
 
259
+ agent = ReasoningAgent()
 
 
 
 
 
 
 
260
  results_log = []
261
  answers_payload = []
 
262
 
263
+ logger.info("Running agent on %d questions...", len(questions_data))
264
  for item in questions_data:
265
  task_id = item.get("task_id")
266
  question_text = item.get("question")
267
  if not task_id or question_text is None:
268
+ logger.warning("Skipping invalid item: %s", item)
269
  continue
270
  try:
271
  submitted_answer = agent(question_text)
 
276
  "Submitted Answer": submitted_answer
277
  })
278
  except Exception as e:
279
+ logger.exception("Agent run error on task %s: %s", task_id, e)
280
  results_log.append({
281
  "Task ID": task_id,
282
  "Question": question_text,
 
287
  logger.warning("Agent did not produce any answers to submit.")
288
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
289
 
 
290
  submission_data = {
291
  "username": username.strip(),
292
  "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
293
  "answers": answers_payload
294
  }
295
+ logger.info("Submitting %d answers for user '%s' to %s ...", len(answers_payload), username, submit_url)
296
+
297
  try:
298
+ resp = requests.post(submit_url, json=submission_data, timeout=60)
299
+ resp.raise_for_status()
300
+ result_data = resp.json()
301
  final_status = (
302
  f"Submission Successful!\n"
303
  f"User: {result_data.get('username')}\n"
 
305
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
306
  f"Message: {result_data.get('message', 'No message received.')}"
307
  )
 
308
  results_df = pd.DataFrame(results_log)
309
+ logger.info("Submission succeeded.")
310
  return final_status, results_df
311
  except requests.exceptions.HTTPError as e:
312
+ logger.exception("Submission HTTP error")
313
  try:
314
+ detail = e.response.json()
 
315
  except Exception:
316
  detail = str(e)
 
317
  results_df = pd.DataFrame(results_log)
318
+ return f"Submission Failed: {detail}", results_df
319
  except Exception as e:
320
+ logger.exception("Submission error")
321
  results_df = pd.DataFrame(results_log)
322
  return f"Submission failed: {e}", results_df
323
 
324
+
325
  # --- Gradio Interface ---
326
  with gr.Blocks() as demo:
327
+ gr.Markdown("# Reasoning Agent Runner")
328
  gr.Markdown(
329
  """
330
  Instructions:
331
+ 1. Login with Hugging Face.
332
  2. Click 'Run Evaluation & Submit All Answers'.
333
+ 3. The agent can call Wikipedia.search(...) and Wikipedia.get_extract(...).
334
  """
335
  )
336
  gr.LoginButton()
 
344
  )
345
 
346
  if __name__ == "__main__":
347
+ logger.info("Starting Gradio app...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  demo.launch(debug=True, share=False)