GregPLigon commited on
Commit
f9e706d
·
verified ·
1 Parent(s): a53c288

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -11
app.py CHANGED
@@ -1,14 +1,63 @@
1
  import os
 
2
  import threading
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
- from smolagents import CodeAgent, DuckDuckGoSearchTool, VisitWebpageTool, LiteLLMModel
7
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # ============================================================================
13
  # AGENT DEFINITION
14
  # ============================================================================
@@ -19,9 +68,8 @@ class GAIAAgent:
19
  if not api_key:
20
  raise ValueError("GEMINI_API_KEY not set in Space secrets")
21
 
22
- # Use LiteLLMModel directly it fully implements the smolagents Model
23
- # interface including ChatMessage, token_usage, code parsing, etc.
24
- # num_retries=0 prevents LiteLLM from hanging on rate limit errors.
25
  model = LiteLLMModel(
26
  model_id="gemini/gemini-2.5-flash",
27
  api_key=api_key,
@@ -30,13 +78,12 @@ class GAIAAgent:
30
  max_tokens=2048,
31
  )
32
 
33
- self.agent = CodeAgent(
34
  model=model,
35
  tools=[
36
  DuckDuckGoSearchTool(),
37
  VisitWebpageTool(),
38
  ],
39
- add_base_tools=True,
40
  max_steps=6,
41
  )
42
 
@@ -57,7 +104,7 @@ Q: What is 2 + 2?
57
  A: 4
58
 
59
  Q: How many studio albums did Mercedes Sosa release between 2000 and 2009 (inclusive)?
60
- A: 3
61
 
62
  Q: List the planets in our solar system.
63
  A: Earth, Jupiter, Mars, Mercury, Neptune, Saturn, Uranus, Venus
@@ -69,7 +116,7 @@ A: Earth, Jupiter, Mars, Mercury, Neptune, Saturn, Uranus, Venus
69
 
70
  def run_agent():
71
  try:
72
- result_container[0] = str(self.agent.run(question)).strip()
73
  except Exception as e:
74
  error_container[0] = str(e)
75
 
@@ -78,13 +125,17 @@ A: Earth, Jupiter, Mars, Mercury, Neptune, Saturn, Uranus, Venus
78
  thread.join(timeout=180) # 3 minutes max per question
79
 
80
  if thread.is_alive():
81
- print(f"Question timed out: {question[:80]}...")
82
  return "unknown"
83
  elif error_container[0]:
84
- print(f"Agent error: {error_container[0]}")
85
  return f"AGENT ERROR: {error_container[0]}"
86
  else:
87
- return result_container[0]
 
 
 
 
88
 
89
 
90
  # ============================================================================
 
1
  import os
2
+ import re
3
  import threading
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, VisitWebpageTool, LiteLLMModel
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
 
13
+ # ============================================================================
14
+ # ANSWER CLEANUP
15
+ # Strips explanatory text so the submitted answer is bare and exact-match ready.
16
+ # ============================================================================
17
+
18
+ def clean_answer(raw: str) -> str:
19
+ """
20
+ Extract the bare answer from whatever the agent returned.
21
+ Handles common patterns where the model adds preamble/postamble.
22
+ """
23
+ if not raw:
24
+ return "unknown"
25
+
26
+ text = raw.strip()
27
+
28
+ # Remove markdown bold/italic
29
+ text = re.sub(r'\*+', '', text)
30
+
31
+ # If it starts with a code fence, extract the content
32
+ code_fence = re.search(r'```(?:python)?\s*(.*?)\s*```', text, re.DOTALL)
33
+ if code_fence:
34
+ text = code_fence.group(1).strip()
35
+
36
+ # Strip [ANSWER] tags if present
37
+ answer_tag = re.search(r'\[ANSWER\]\s*(.*)', text, re.DOTALL)
38
+ if answer_tag:
39
+ text = answer_tag.group(1).strip()
40
+
41
+ # If the text is a single short line already, return it directly
42
+ lines = [l.strip() for l in text.splitlines() if l.strip()]
43
+ if len(lines) == 1:
44
+ return lines[0]
45
+
46
+ # Look for "Thoughts: ... \n <answer>" pattern — take the last non-empty line
47
+ # but only if it looks like a bare answer (short, no sentence structure)
48
+ if lines:
49
+ last_line = lines[-1]
50
+ # If the last line is short and doesn't look like a sentence, use it
51
+ if len(last_line) < 100 and not last_line.endswith(('.', '?', '!')):
52
+ return last_line
53
+ # If the last line ends with punctuation but is short, still use it
54
+ if len(last_line) < 50:
55
+ return last_line
56
+
57
+ # Fallback: return the full stripped text
58
+ return text.strip()
59
+
60
+
61
  # ============================================================================
62
  # AGENT DEFINITION
63
  # ============================================================================
 
68
  if not api_key:
69
  raise ValueError("GEMINI_API_KEY not set in Space secrets")
70
 
71
+ # ToolCallingAgent uses JSON tool calls compatible with how
72
+ # Gemini 2.5 Flash responds (no code block requirement)
 
73
  model = LiteLLMModel(
74
  model_id="gemini/gemini-2.5-flash",
75
  api_key=api_key,
 
78
  max_tokens=2048,
79
  )
80
 
81
+ self.agent = ToolCallingAgent(
82
  model=model,
83
  tools=[
84
  DuckDuckGoSearchTool(),
85
  VisitWebpageTool(),
86
  ],
 
87
  max_steps=6,
88
  )
89
 
 
104
  A: 4
105
 
106
  Q: How many studio albums did Mercedes Sosa release between 2000 and 2009 (inclusive)?
107
+ A: 5
108
 
109
  Q: List the planets in our solar system.
110
  A: Earth, Jupiter, Mars, Mercury, Neptune, Saturn, Uranus, Venus
 
116
 
117
  def run_agent():
118
  try:
119
+ result_container[0] = self.agent.run(question)
120
  except Exception as e:
121
  error_container[0] = str(e)
122
 
 
125
  thread.join(timeout=180) # 3 minutes max per question
126
 
127
  if thread.is_alive():
128
+ print(f" Question timed out: {question[:80]}...")
129
  return "unknown"
130
  elif error_container[0]:
131
+ print(f" Agent error: {error_container[0]}")
132
  return f"AGENT ERROR: {error_container[0]}"
133
  else:
134
+ raw = str(result_container[0]).strip() if result_container[0] is not None else "unknown"
135
+ cleaned = clean_answer(raw)
136
+ if cleaned != raw:
137
+ print(f" Answer cleaned: {repr(raw[:80])} -> {repr(cleaned[:80])}")
138
+ return cleaned
139
 
140
 
141
  # ============================================================================