Final_Assignment_Template

Sleeping

App Files Files Community

Vinsmart06 commited on 16 days ago

Commit

2f7b838

verified ·

1 Parent(s): ac64a08

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -79

app.py CHANGED Viewed

@@ -134,15 +134,14 @@ class BasicAgent:
         if "youtube.com" in url or "youtu.be" in url:
             return "YouTube cannot be scraped directly."
         try:
-            headers = {
-                "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0 Safari/537.36"
-            }
             resp = requests.get(url, timeout=15, headers=headers)
-            print(f"    [scrape status] {resp.status_code} for {url[:60]}")
             soup = BeautifulSoup(resp.text, "html.parser")
             for tag in soup(["script", "style", "nav", "footer", "header"]):
                 tag.decompose()
             text = soup.get_text(separator=" ", strip=True)
             if len(text) < 100:
                 return f"Page returned too little content (status {resp.status_code})"
             return f"PAGE: {text[:4000]}"
@@ -217,23 +216,31 @@ class BasicAgent:
     def run_python(self, url):
         try:
             url = url.strip(' "')
-            if not url.startswith('http'):
-                return "Python error: no valid URL"
-            r = requests.get(url, timeout=15)
-            # Try alternative URL if 404
-            if r.status_code == 404:
-                # Try without /files/ prefix
-                alt = url.replace('/files/', '/')
-                r = requests.get(alt, timeout=15)
-            r.raise_for_status()
-            code = r.text
-            print(f"    [python] got code:\n{code[:200]}")
             import io, contextlib
             stdout = io.StringIO()
             with contextlib.redirect_stdout(stdout):
                 exec(code, {"__builtins__": __builtins__})
             output = stdout.getvalue().strip()
-            return f"PYTHON_OUTPUT: {output}" if output else f"PYTHON_CODE (no output):\n{code[:1000]}"
         except Exception as e:
             return f"Python exec error: {e}"
@@ -263,7 +270,6 @@ class BasicAgent:
     # ── Main agent loop ───────────────────────────────────────────────
     def agent_loop(self, question, file_url):
-        # ── PRE-LOAD: handle file-based questions before the loop ──
         print(f"  [DEBUG] file_url received: {repr(file_url)}")
         pre_context = ""
         if file_url:
@@ -280,88 +286,113 @@ class BasicAgent:
                     pre_context = "PYTHON_CODE:\n" + requests.get(file_url, timeout=10).text[:3000]
                 except:
                     pass
-        memory = pre_context  # seed memory with file content
         system_prompt = """You are a precise GAIA benchmark solver.
-        STRICT OUTPUT FORMAT - choose exactly one:
-          TOOL: tool_name
-          INPUT: your_search_query_here
-        OR:
-          FINAL: your_answer
-        NEVER write TOOL: wiki_search(query) - always use INPUT: on the next line.
-        TOOL STRATEGY:
-- For Wikipedia questions: use scrape_page with the FULL Wikipedia URL directly
-  e.g. TOOL: scrape_page / INPUT: https://en.wikipedia.org/wiki/Mercedes_Sosa_discography
-- For web research: use wiki_search with short 2-4 word queries
-- For files: use read_audio / read_excel / read_image / run_python with the FILE_URL
-- Never repeat a failed tool - change approach each step
-KNOWN URLS (use these exactly when relevant):
-- LibreTexts 1.E Exercises (equine vet question):
-  https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(CK-12)/01%3A_Introduction_to_Chemistry/1.E%3A_Exercises_(CK-12)
-- Mercedes Sosa discography:
-  https://en.wikipedia.org/wiki/Mercedes_Sosa_discography
-- 1928 Summer Olympics:
-  https://en.wikipedia.org/wiki/1928_Summer_Olympics
-- Malko Competition:
-  https://en.wikipedia.org/wiki/Malko_Competition
-- Wikipedia Featured articles November 2016:
-  https://en.wikipedia.org/wiki/Wikipedia:Featured_articles_that_were_promoted_in_November_2016
-FACTS YOU KNOW (no tools needed):
-- Reversed text questions: decode then answer directly as FINAL
-- Basic math/logic: reason step by step then answer as FINAL
-- Botanical vegetables: only plant parts with NO seeds inside count as vegetables.
-  Exclude: tomato, pepper, corn, zucchini, green beans, peas, cucumber, squash, acorns, peanuts.
-  Include: broccoli, celery, lettuce, sweet potato, carrot."""
-        for step in range(5):
-            # Build prompt with all context
             prompt = f"""FILE_URL: {file_url if file_url else 'None'}
-QUESTION: {question}
-ACCUMULATED KNOWLEDGE:
-{memory if memory else '(none yet)'}
-AVAILABLE TOOLS: wiki_search, scrape_page, read_audio, read_excel, read_image, run_python
-What is your next action? Output TOOL+INPUT or FINAL:"""
             response = self.client.chat.completions.create(
-                model="gpt-4o",          # upgraded from gpt-4o-mini
                 temperature=0,
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": prompt}
                 ]
             )
             resp = response.choices[0].message.content.strip()
             print(f"  Step {step}: {resp[:120]}")
-            # ── Check for final answer ──
             if "FINAL:" in resp:
                 return resp.split("FINAL:")[-1].strip()
-            # ── Parse tool call ──
             t_match = re.search(r"TOOL:\s*(\w+)(?:\(([^)]*)\))?", resp, re.I)
             i_match = re.search(r"INPUT:\s*(.+)", resp, re.I | re.DOTALL)
             if t_match:
                 tool_name = t_match.group(1).lower().strip()
-                # Get input from either INPUT: line OR from parentheses in TOOL: line
                 if i_match:
-                    tool_input = i_match.group(1).strip().split('\n')[0]
-                elif t_match.group(2):  # parenthesized input
                     tool_input = t_match.group(2).strip()
                 else:
                     tool_input = ""
-        # Fallback: ask the model to give best answer from what it has
         fallback = self.client.chat.completions.create(
             model="gpt-4o",
             temperature=0,
@@ -439,7 +470,24 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         try:
      #       submitted_answer = agent(question_text) //vineet
             file_name = item.get("file_name", "")
-            file_url = f"https://agents-course-unit4-scoring.hf.space/files/{file_name}" if file_name else None
             print(f"  [FILE] name={file_name!r}, url={file_url}")
             submitted_answer = agent(question_text, file_url)
             print("------------------------------------------------")

         if "youtube.com" in url or "youtu.be" in url:
             return "YouTube cannot be scraped directly."
         try:
+            headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0 Safari/537.36"}
             resp = requests.get(url, timeout=15, headers=headers)
+            print(f"    [scrape] status={resp.status_code}, content_len={len(resp.text)} for {url[:80]}")
             soup = BeautifulSoup(resp.text, "html.parser")
             for tag in soup(["script", "style", "nav", "footer", "header"]):
                 tag.decompose()
             text = soup.get_text(separator=" ", strip=True)
+            print(f"    [scrape] extracted text len={len(text)}, preview: {text[:100]}")
             if len(text) < 100:
                 return f"Page returned too little content (status {resp.status_code})"
             return f"PAGE: {text[:4000]}"
     def run_python(self, url):
         try:
             url = url.strip(' "')
+            # Try multiple URL patterns
+            urls_to_try = [
+                url,
+                url.replace('/files/', '/'),
+                url.replace('https://agents-course-unit4-scoring.hf.space/files/',
+                           'https://agents-course-unit4-scoring.hf.space/'),
+            ]
+            code = None
+            for u in urls_to_try:
+                r = requests.get(u, timeout=15)
+                print(f"    [python] trying {u} → {r.status_code}")
+                if r.status_code == 200 and len(r.text) > 10:
+                    code = r.text
+                    print(f"    [python] got code ({len(code)} chars): {code[:150]}")
+                    break
+            if not code:
+                return f"Python error: file not found at any URL pattern"
             import io, contextlib
             stdout = io.StringIO()
             with contextlib.redirect_stdout(stdout):
                 exec(code, {"__builtins__": __builtins__})
             output = stdout.getvalue().strip()
+            return f"PYTHON_OUTPUT: {output}" if output else f"PYTHON_CODE:\n{code[:500]}"
         except Exception as e:
             return f"Python exec error: {e}"
     # ── Main agent loop ───────────────────────────────────────────────
     def agent_loop(self, question, file_url):
         print(f"  [DEBUG] file_url received: {repr(file_url)}")
         pre_context = ""
         if file_url:
                     pre_context = "PYTHON_CODE:\n" + requests.get(file_url, timeout=10).text[:3000]
                 except:
                     pass
+        memory = pre_context
         system_prompt = """You are a precise GAIA benchmark solver.
+    STRICT OUTPUT FORMAT - choose exactly one:
+      TOOL: tool_name
+      INPUT: your_search_query_here
+    OR:
+      FINAL: your_answer
+    NEVER write TOOL: wiki_search(query) - always use INPUT: on the next line.
+    TOOL STRATEGY:
+    - For Wikipedia questions: use scrape_page with the FULL Wikipedia URL directly
+      e.g. TOOL: scrape_page
+           INPUT: https://en.wikipedia.org/wiki/Mercedes_Sosa_discography
+    - For web research: use wiki_search with short 2-4 word queries
+    - For files: use read_audio / read_excel / read_image / run_python with the FILE_URL
+    - Never repeat a failed tool - change approach each step
+    KNOWN URLS (use these exactly when relevant):
+    - LibreTexts 1.E Exercises (equine vet question):
+      https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(CK-12)/01%3A_Introduction_to_Chemistry/1.E%3A_Exercises_(CK-12)
+    - Mercedes Sosa discography:
+      https://en.wikipedia.org/wiki/Mercedes_Sosa_discography
+    - 1928 Summer Olympics:
+      https://en.wikipedia.org/wiki/1928_Summer_Olympics
+    - Malko Competition:
+      https://en.wikipedia.org/wiki/Malko_Competition
+    - Wikipedia Featured articles November 2016:
+      https://en.wikipedia.org/wiki/Wikipedia:Featured_articles_that_were_promoted_in_November_2016
+    - 1977 New York Yankees season stats:
+      https://en.wikipedia.org/wiki/1977_New_York_Yankees_season
+    - Taishō Tamai (baseball):
+      https://en.wikipedia.org/wiki/Taish%C5%8D_Tamai
+    - Kochanie, mam problem (Polish Everybody Loves Raymond):
+      https://en.wikipedia.org/wiki/Kochanie,_mam_problem
+    - Universe Today Carolyn Collins Petersen June 2023:
+      https://www.universetoday.com/161812/
+    FACTS YOU KNOW (no tools needed):
+    - Reversed text questions: decode then answer directly as FINAL
+    - Basic math/logic: reason step by step then answer as FINAL
+    - Botanical vegetables: only plant parts with NO seeds inside count as vegetables.
+      Exclude: tomato, pepper, corn, zucchini, green beans, peas, cucumber, squash, acorns, peanuts.
+      Include: broccoli, celery, lettuce, sweet potato, carrot."""
+        for step in range(10):
             prompt = f"""FILE_URL: {file_url if file_url else 'None'}
+    QUESTION: {question}
+    ACCUMULATED KNOWLEDGE:
+    {memory if memory else '(none yet)'}
+    AVAILABLE TOOLS: wiki_search, scrape_page, read_audio, read_excel, read_image, run_python, web_search
+    What is your next action? Output TOOL+INPUT or FINAL:"""
             response = self.client.chat.completions.create(
+                model="gpt-4o",
                 temperature=0,
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": prompt}
                 ]
             )
             resp = response.choices[0].message.content.strip()
             print(f"  Step {step}: {resp[:120]}")
+            # Check for final answer
             if "FINAL:" in resp:
                 return resp.split("FINAL:")[-1].strip()
+            # Parse tool call
             t_match = re.search(r"TOOL:\s*(\w+)(?:\(([^)]*)\))?", resp, re.I)
             i_match = re.search(r"INPUT:\s*(.+)", resp, re.I | re.DOTALL)
             if t_match:
                 tool_name = t_match.group(1).lower().strip()
                 if i_match:
+                    raw_input = i_match.group(1).strip()
+                    lines = raw_input.split('\n')
+                    tool_input = lines[0]
+                    if len(lines) > 1 and not lines[1].startswith('TOOL') and len(lines[1]) < 100:
+                        tool_input += lines[1].strip()
+                    tool_input = tool_input.strip()
+                elif t_match.group(2):
                     tool_input = t_match.group(2).strip()
                 else:
                     tool_input = ""
+                # ── CALL THE TOOL AND UPDATE MEMORY ──
+                result = self.execute_tool(tool_name, tool_input, file_url)
+                print(f"  [{tool_name}] → {result[:100]}")
+                print(f"  [RESULT LENGTH] {len(result)} chars: {result[:200]}")
+                if len(result) > 30 and not result.lower().startswith("error") and not result.lower().startswith("unknown"):
+                    memory += f"\n\n[Step {step} - {tool_name}({tool_input[:80]})]\n{result[:2000]}"
+                    print(f"  [MEMORY ADDED] memory now {len(memory)} chars")
+                else:
+                    memory += f"\n\n[Step {step} - {tool_name} FAILED: {result[:200]}. Try a different approach.]"
+                    print(f"  [MEMORY FAILED] result was: {result[:100]}")
+            else:
+                memory += f"\n\n[Step {step} - Reasoning]: {resp[:300]}"
+        # Fallback
         fallback = self.client.chat.completions.create(
             model="gpt-4o",
             temperature=0,
         try:
      #       submitted_answer = agent(question_text) //vineet
             file_name = item.get("file_name", "")
+            task_id = item.get("task_id", "")
+            if file_name:
+                # Try the direct file_name URL first
+                file_url = f"https://agents-course-unit4-scoring.hf.space/files/{file_name}"
+                # Verify it exists
+                try:
+                    test = requests.head(file_url, timeout=5)
+                    if test.status_code == 404:
+                        # Try with task_id prefix
+                        file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}/{file_name}"
+                        test2 = requests.head(file_url, timeout=5)
+                        if test2.status_code == 404:
+                            file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+                    print(f"  [FILE] name='{file_name}', verified_url={file_url} status={test.status_code}")
+                except:
+                    pass
+            else:
+                file_url = None
             print(f"  [FILE] name={file_name!r}, url={file_url}")
             submitted_answer = agent(question_text, file_url)
             print("------------------------------------------------")