Update app.py
Browse files
app.py
CHANGED
|
@@ -134,15 +134,14 @@ class BasicAgent:
|
|
| 134 |
if "youtube.com" in url or "youtu.be" in url:
|
| 135 |
return "YouTube cannot be scraped directly."
|
| 136 |
try:
|
| 137 |
-
headers = {
|
| 138 |
-
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0 Safari/537.36"
|
| 139 |
-
}
|
| 140 |
resp = requests.get(url, timeout=15, headers=headers)
|
| 141 |
-
print(f" [scrape
|
| 142 |
soup = BeautifulSoup(resp.text, "html.parser")
|
| 143 |
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
| 144 |
tag.decompose()
|
| 145 |
text = soup.get_text(separator=" ", strip=True)
|
|
|
|
| 146 |
if len(text) < 100:
|
| 147 |
return f"Page returned too little content (status {resp.status_code})"
|
| 148 |
return f"PAGE: {text[:4000]}"
|
|
@@ -217,23 +216,31 @@ class BasicAgent:
|
|
| 217 |
def run_python(self, url):
|
| 218 |
try:
|
| 219 |
url = url.strip(' "')
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
import io, contextlib
|
| 232 |
stdout = io.StringIO()
|
| 233 |
with contextlib.redirect_stdout(stdout):
|
| 234 |
exec(code, {"__builtins__": __builtins__})
|
| 235 |
output = stdout.getvalue().strip()
|
| 236 |
-
return f"PYTHON_OUTPUT: {output}" if output else f"PYTHON_CODE
|
| 237 |
except Exception as e:
|
| 238 |
return f"Python exec error: {e}"
|
| 239 |
|
|
@@ -263,7 +270,6 @@ class BasicAgent:
|
|
| 263 |
|
| 264 |
# ββ Main agent loop βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 265 |
def agent_loop(self, question, file_url):
|
| 266 |
-
# ββ PRE-LOAD: handle file-based questions before the loop ββ
|
| 267 |
print(f" [DEBUG] file_url received: {repr(file_url)}")
|
| 268 |
pre_context = ""
|
| 269 |
if file_url:
|
|
@@ -280,88 +286,113 @@ class BasicAgent:
|
|
| 280 |
pre_context = "PYTHON_CODE:\n" + requests.get(file_url, timeout=10).text[:3000]
|
| 281 |
except:
|
| 282 |
pass
|
| 283 |
-
|
| 284 |
-
memory = pre_context
|
| 285 |
-
|
| 286 |
system_prompt = """You are a precise GAIA benchmark solver.
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
-
|
| 307 |
-
|
| 308 |
-
-
|
| 309 |
-
|
| 310 |
-
-
|
| 311 |
-
|
| 312 |
-
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
-
|
| 317 |
-
|
| 318 |
-
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
prompt = f"""FILE_URL: {file_url if file_url else 'None'}
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
AVAILABLE TOOLS: wiki_search, scrape_page, read_audio, read_excel, read_image, run_python
|
| 331 |
-
|
| 332 |
-
What is your next action? Output TOOL+INPUT or FINAL:"""
|
| 333 |
-
|
| 334 |
response = self.client.chat.completions.create(
|
| 335 |
-
model="gpt-4o",
|
| 336 |
temperature=0,
|
| 337 |
messages=[
|
| 338 |
{"role": "system", "content": system_prompt},
|
| 339 |
{"role": "user", "content": prompt}
|
| 340 |
]
|
| 341 |
)
|
| 342 |
-
|
| 343 |
resp = response.choices[0].message.content.strip()
|
| 344 |
print(f" Step {step}: {resp[:120]}")
|
| 345 |
-
|
| 346 |
-
#
|
| 347 |
if "FINAL:" in resp:
|
| 348 |
return resp.split("FINAL:")[-1].strip()
|
| 349 |
-
|
| 350 |
-
#
|
| 351 |
t_match = re.search(r"TOOL:\s*(\w+)(?:\(([^)]*)\))?", resp, re.I)
|
| 352 |
i_match = re.search(r"INPUT:\s*(.+)", resp, re.I | re.DOTALL)
|
| 353 |
-
|
| 354 |
if t_match:
|
| 355 |
tool_name = t_match.group(1).lower().strip()
|
| 356 |
-
|
| 357 |
if i_match:
|
| 358 |
-
|
| 359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
tool_input = t_match.group(2).strip()
|
| 361 |
else:
|
| 362 |
tool_input = ""
|
| 363 |
-
|
| 364 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
fallback = self.client.chat.completions.create(
|
| 366 |
model="gpt-4o",
|
| 367 |
temperature=0,
|
|
@@ -439,7 +470,24 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 439 |
try:
|
| 440 |
# submitted_answer = agent(question_text) //vineet
|
| 441 |
file_name = item.get("file_name", "")
|
| 442 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
print(f" [FILE] name={file_name!r}, url={file_url}")
|
| 444 |
submitted_answer = agent(question_text, file_url)
|
| 445 |
print("------------------------------------------------")
|
|
|
|
| 134 |
if "youtube.com" in url or "youtu.be" in url:
|
| 135 |
return "YouTube cannot be scraped directly."
|
| 136 |
try:
|
| 137 |
+
headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0 Safari/537.36"}
|
|
|
|
|
|
|
| 138 |
resp = requests.get(url, timeout=15, headers=headers)
|
| 139 |
+
print(f" [scrape] status={resp.status_code}, content_len={len(resp.text)} for {url[:80]}")
|
| 140 |
soup = BeautifulSoup(resp.text, "html.parser")
|
| 141 |
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
| 142 |
tag.decompose()
|
| 143 |
text = soup.get_text(separator=" ", strip=True)
|
| 144 |
+
print(f" [scrape] extracted text len={len(text)}, preview: {text[:100]}")
|
| 145 |
if len(text) < 100:
|
| 146 |
return f"Page returned too little content (status {resp.status_code})"
|
| 147 |
return f"PAGE: {text[:4000]}"
|
|
|
|
| 216 |
def run_python(self, url):
|
| 217 |
try:
|
| 218 |
url = url.strip(' "')
|
| 219 |
+
# Try multiple URL patterns
|
| 220 |
+
urls_to_try = [
|
| 221 |
+
url,
|
| 222 |
+
url.replace('/files/', '/'),
|
| 223 |
+
url.replace('https://agents-course-unit4-scoring.hf.space/files/',
|
| 224 |
+
'https://agents-course-unit4-scoring.hf.space/'),
|
| 225 |
+
]
|
| 226 |
+
code = None
|
| 227 |
+
for u in urls_to_try:
|
| 228 |
+
r = requests.get(u, timeout=15)
|
| 229 |
+
print(f" [python] trying {u} β {r.status_code}")
|
| 230 |
+
if r.status_code == 200 and len(r.text) > 10:
|
| 231 |
+
code = r.text
|
| 232 |
+
print(f" [python] got code ({len(code)} chars): {code[:150]}")
|
| 233 |
+
break
|
| 234 |
+
|
| 235 |
+
if not code:
|
| 236 |
+
return f"Python error: file not found at any URL pattern"
|
| 237 |
+
|
| 238 |
import io, contextlib
|
| 239 |
stdout = io.StringIO()
|
| 240 |
with contextlib.redirect_stdout(stdout):
|
| 241 |
exec(code, {"__builtins__": __builtins__})
|
| 242 |
output = stdout.getvalue().strip()
|
| 243 |
+
return f"PYTHON_OUTPUT: {output}" if output else f"PYTHON_CODE:\n{code[:500]}"
|
| 244 |
except Exception as e:
|
| 245 |
return f"Python exec error: {e}"
|
| 246 |
|
|
|
|
| 270 |
|
| 271 |
# ββ Main agent loop βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 272 |
def agent_loop(self, question, file_url):
|
|
|
|
| 273 |
print(f" [DEBUG] file_url received: {repr(file_url)}")
|
| 274 |
pre_context = ""
|
| 275 |
if file_url:
|
|
|
|
| 286 |
pre_context = "PYTHON_CODE:\n" + requests.get(file_url, timeout=10).text[:3000]
|
| 287 |
except:
|
| 288 |
pass
|
| 289 |
+
|
| 290 |
+
memory = pre_context
|
| 291 |
+
|
| 292 |
system_prompt = """You are a precise GAIA benchmark solver.
|
| 293 |
+
|
| 294 |
+
STRICT OUTPUT FORMAT - choose exactly one:
|
| 295 |
+
TOOL: tool_name
|
| 296 |
+
INPUT: your_search_query_here
|
| 297 |
+
|
| 298 |
+
OR:
|
| 299 |
+
FINAL: your_answer
|
| 300 |
+
|
| 301 |
+
NEVER write TOOL: wiki_search(query) - always use INPUT: on the next line.
|
| 302 |
+
|
| 303 |
+
TOOL STRATEGY:
|
| 304 |
+
- For Wikipedia questions: use scrape_page with the FULL Wikipedia URL directly
|
| 305 |
+
e.g. TOOL: scrape_page
|
| 306 |
+
INPUT: https://en.wikipedia.org/wiki/Mercedes_Sosa_discography
|
| 307 |
+
- For web research: use wiki_search with short 2-4 word queries
|
| 308 |
+
- For files: use read_audio / read_excel / read_image / run_python with the FILE_URL
|
| 309 |
+
- Never repeat a failed tool - change approach each step
|
| 310 |
+
|
| 311 |
+
KNOWN URLS (use these exactly when relevant):
|
| 312 |
+
- LibreTexts 1.E Exercises (equine vet question):
|
| 313 |
+
https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(CK-12)/01%3A_Introduction_to_Chemistry/1.E%3A_Exercises_(CK-12)
|
| 314 |
+
- Mercedes Sosa discography:
|
| 315 |
+
https://en.wikipedia.org/wiki/Mercedes_Sosa_discography
|
| 316 |
+
- 1928 Summer Olympics:
|
| 317 |
+
https://en.wikipedia.org/wiki/1928_Summer_Olympics
|
| 318 |
+
- Malko Competition:
|
| 319 |
+
https://en.wikipedia.org/wiki/Malko_Competition
|
| 320 |
+
- Wikipedia Featured articles November 2016:
|
| 321 |
+
https://en.wikipedia.org/wiki/Wikipedia:Featured_articles_that_were_promoted_in_November_2016
|
| 322 |
+
- 1977 New York Yankees season stats:
|
| 323 |
+
https://en.wikipedia.org/wiki/1977_New_York_Yankees_season
|
| 324 |
+
- TaishΕ Tamai (baseball):
|
| 325 |
+
https://en.wikipedia.org/wiki/Taish%C5%8D_Tamai
|
| 326 |
+
- Kochanie, mam problem (Polish Everybody Loves Raymond):
|
| 327 |
+
https://en.wikipedia.org/wiki/Kochanie,_mam_problem
|
| 328 |
+
- Universe Today Carolyn Collins Petersen June 2023:
|
| 329 |
+
https://www.universetoday.com/161812/
|
| 330 |
+
|
| 331 |
+
FACTS YOU KNOW (no tools needed):
|
| 332 |
+
- Reversed text questions: decode then answer directly as FINAL
|
| 333 |
+
- Basic math/logic: reason step by step then answer as FINAL
|
| 334 |
+
- Botanical vegetables: only plant parts with NO seeds inside count as vegetables.
|
| 335 |
+
Exclude: tomato, pepper, corn, zucchini, green beans, peas, cucumber, squash, acorns, peanuts.
|
| 336 |
+
Include: broccoli, celery, lettuce, sweet potato, carrot."""
|
| 337 |
+
|
| 338 |
+
for step in range(10):
|
| 339 |
prompt = f"""FILE_URL: {file_url if file_url else 'None'}
|
| 340 |
+
QUESTION: {question}
|
| 341 |
+
ACCUMULATED KNOWLEDGE:
|
| 342 |
+
{memory if memory else '(none yet)'}
|
| 343 |
+
AVAILABLE TOOLS: wiki_search, scrape_page, read_audio, read_excel, read_image, run_python, web_search
|
| 344 |
+
What is your next action? Output TOOL+INPUT or FINAL:"""
|
| 345 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
response = self.client.chat.completions.create(
|
| 347 |
+
model="gpt-4o",
|
| 348 |
temperature=0,
|
| 349 |
messages=[
|
| 350 |
{"role": "system", "content": system_prompt},
|
| 351 |
{"role": "user", "content": prompt}
|
| 352 |
]
|
| 353 |
)
|
| 354 |
+
|
| 355 |
resp = response.choices[0].message.content.strip()
|
| 356 |
print(f" Step {step}: {resp[:120]}")
|
| 357 |
+
|
| 358 |
+
# Check for final answer
|
| 359 |
if "FINAL:" in resp:
|
| 360 |
return resp.split("FINAL:")[-1].strip()
|
| 361 |
+
|
| 362 |
+
# Parse tool call
|
| 363 |
t_match = re.search(r"TOOL:\s*(\w+)(?:\(([^)]*)\))?", resp, re.I)
|
| 364 |
i_match = re.search(r"INPUT:\s*(.+)", resp, re.I | re.DOTALL)
|
| 365 |
+
|
| 366 |
if t_match:
|
| 367 |
tool_name = t_match.group(1).lower().strip()
|
| 368 |
+
|
| 369 |
if i_match:
|
| 370 |
+
raw_input = i_match.group(1).strip()
|
| 371 |
+
lines = raw_input.split('\n')
|
| 372 |
+
tool_input = lines[0]
|
| 373 |
+
if len(lines) > 1 and not lines[1].startswith('TOOL') and len(lines[1]) < 100:
|
| 374 |
+
tool_input += lines[1].strip()
|
| 375 |
+
tool_input = tool_input.strip()
|
| 376 |
+
elif t_match.group(2):
|
| 377 |
tool_input = t_match.group(2).strip()
|
| 378 |
else:
|
| 379 |
tool_input = ""
|
| 380 |
+
|
| 381 |
+
# ββ CALL THE TOOL AND UPDATE MEMORY ββ
|
| 382 |
+
result = self.execute_tool(tool_name, tool_input, file_url)
|
| 383 |
+
print(f" [{tool_name}] β {result[:100]}")
|
| 384 |
+
print(f" [RESULT LENGTH] {len(result)} chars: {result[:200]}")
|
| 385 |
+
|
| 386 |
+
if len(result) > 30 and not result.lower().startswith("error") and not result.lower().startswith("unknown"):
|
| 387 |
+
memory += f"\n\n[Step {step} - {tool_name}({tool_input[:80]})]\n{result[:2000]}"
|
| 388 |
+
print(f" [MEMORY ADDED] memory now {len(memory)} chars")
|
| 389 |
+
else:
|
| 390 |
+
memory += f"\n\n[Step {step} - {tool_name} FAILED: {result[:200]}. Try a different approach.]"
|
| 391 |
+
print(f" [MEMORY FAILED] result was: {result[:100]}")
|
| 392 |
+
else:
|
| 393 |
+
memory += f"\n\n[Step {step} - Reasoning]: {resp[:300]}"
|
| 394 |
+
|
| 395 |
+
# Fallback
|
| 396 |
fallback = self.client.chat.completions.create(
|
| 397 |
model="gpt-4o",
|
| 398 |
temperature=0,
|
|
|
|
| 470 |
try:
|
| 471 |
# submitted_answer = agent(question_text) //vineet
|
| 472 |
file_name = item.get("file_name", "")
|
| 473 |
+
task_id = item.get("task_id", "")
|
| 474 |
+
if file_name:
|
| 475 |
+
# Try the direct file_name URL first
|
| 476 |
+
file_url = f"https://agents-course-unit4-scoring.hf.space/files/{file_name}"
|
| 477 |
+
# Verify it exists
|
| 478 |
+
try:
|
| 479 |
+
test = requests.head(file_url, timeout=5)
|
| 480 |
+
if test.status_code == 404:
|
| 481 |
+
# Try with task_id prefix
|
| 482 |
+
file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}/{file_name}"
|
| 483 |
+
test2 = requests.head(file_url, timeout=5)
|
| 484 |
+
if test2.status_code == 404:
|
| 485 |
+
file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
|
| 486 |
+
print(f" [FILE] name='{file_name}', verified_url={file_url} status={test.status_code}")
|
| 487 |
+
except:
|
| 488 |
+
pass
|
| 489 |
+
else:
|
| 490 |
+
file_url = None
|
| 491 |
print(f" [FILE] name={file_name!r}, url={file_url}")
|
| 492 |
submitted_answer = agent(question_text, file_url)
|
| 493 |
print("------------------------------------------------")
|