Spaces:

mabelwang21
/

Agents_Final_Assignment

Sleeping

App Files Files Community

mabelwang21 commited on May 12, 2025

Commit

fec32f4

1 Parent(s): cbc2dad

test agent result

Browse files

Files changed (2) hide show

test_agent.py +73 -0
view_jsonfile.ipynb +154 -118

test_agent.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import json
+from pathlib import Path
+from agent import MyAgent
+def test_agent(
+    metadata_path: str = "metadata.jsonl",
+    max_tests: int = 5,
+):
+    """
+    Load up to max_tests questions from the GAIA metadata JSONL file
+    and run them through MyAgent for a quick functionality check.
+    """
+    # Initialize agent
+    agent = MyAgent()
+    metadata_file = Path(metadata_path)
+    if not metadata_file.exists():
+        print(f"Metadata file not found: {metadata_path}")
+        return
+    with open(metadata_file, "r", encoding="utf-8") as f:
+        for i, line in enumerate(f):
+            if i >= max_tests:
+                break
+            try:
+                meta = json.loads(line)
+            except json.JSONDecodeError:
+                print(f"Invalid JSON on line {i+1}")
+                continue
+            # Support both 'task_id' and 'id'
+            task_id = meta.get("task_id") or meta.get("id") or ""
+            # Support both 'question' and 'text'
+            question = meta.get("Question") or meta.get("text") or ""
+            print(f"--- Test {i+1}/{max_tests}: Task ID {task_id} ---")
+            print(f"Question: {question}")
+            if not question:
+                print("Skipping: no question found\n")
+                continue
+            try:
+                # If there's a file_name field, pass it to agent.run
+                file_arg = None
+                if meta.get("file_name"):
+                    file_arg = meta.get("file_name")
+                # Call agent with question and optional file
+                if file_arg:
+                    answer = agent.run(question, file_paths=[file_arg])
+                else:
+                    answer = agent.run(question)
+                print(f"Answer: {answer}\n")
+            except Exception as e:
+                print(f"Error running agent on question '{question}': {e}\n")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Test MyAgent with GAIA metadata.")
+    parser.add_argument(
+        "--metadata", type=str, default="metadata.jsonl",
+        help="Path to GAIA metadata JSONL"
+    )
+    parser.add_argument(
+        "--max", type=int, default=5,
+        help="Maximum number of tests to run"
+    )
+    args = parser.parse_args()
+    test_agent(args.metadata, args.max)

view_jsonfile.ipynb CHANGED Viewed

@@ -166,6 +166,57 @@
     "df['Annotator Metadata'][1]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -194,7 +245,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "1f0a65e7",
    "metadata": {},
    "outputs": [
@@ -225,135 +276,120 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
-   "id": "3674e568",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "True"
       ]
      },
-     "execution_count": 48,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "import os\n",
-    "import requests\n",
-    "import json\n",
-    "import base64\n",
-    "  \n",
-    "import numpy as np\n",
-    "from smolagents import CodeAgent, HfApiModel, Tool\n",
-    "from langchain.agents import load_tools\n",
-    "\n",
-    "#load env variables\n",
-    "from dotenv import load_dotenv\n",
-    "load_dotenv()\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "00f79e78",
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "PydanticUserError",
-     "evalue": "Field 'name' defined on a base class was overridden by a non-annotated attribute. All field definitions, including overrides, require a type annotation.\n\nFor further information visit https://errors.pydantic.dev/2.11/u/model-field-overridden",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mPydanticUserError\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[52], line 38\u001b[0m\n\u001b[0;32m     31\u001b[0m wikipedia_tool \u001b[38;5;241m=\u001b[39m Tool(\n\u001b[0;32m     32\u001b[0m     name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWikipedia\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m     33\u001b[0m     description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSearch Wikipedia articles for information\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m     34\u001b[0m     func\u001b[38;5;241m=\u001b[39mwikipedia\u001b[38;5;241m.\u001b[39mrun\n\u001b[0;32m     35\u001b[0m )\n\u001b[0;32m     37\u001b[0m \u001b[38;5;66;03m# Basic Calculator Tool (free)\u001b[39;00m\n\u001b[1;32m---> 38\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mCalculatorTool\u001b[39;00m(BaseTool):\n\u001b[0;32m     39\u001b[0m     name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCalculator\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m     40\u001b[0m     description \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUseful for performing mathematical calculations\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
-      "File \u001b[1;32mc:\\Users\\mabel\\Anaconda3\\envs\\hf_agent\\lib\\site-packages\\pydantic\\_internal\\_model_construction.py:112\u001b[0m, in \u001b[0;36mModelMetaclass.__new__\u001b[1;34m(mcs, cls_name, bases, namespace, __pydantic_generic_metadata__, __pydantic_reset_parent_namespace__, _create_model_module, **kwargs)\u001b[0m\n\u001b[0;32m    110\u001b[0m config_wrapper \u001b[38;5;241m=\u001b[39m ConfigWrapper\u001b[38;5;241m.\u001b[39mfor_model(bases, namespace, kwargs)\n\u001b[0;32m    111\u001b[0m namespace[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_config\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m config_wrapper\u001b[38;5;241m.\u001b[39mconfig_dict\n\u001b[1;32m--> 112\u001b[0m private_attributes \u001b[38;5;241m=\u001b[39m \u001b[43minspect_namespace\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    113\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnamespace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig_wrapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mignored_types\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mclass_vars\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbase_field_names\u001b[49m\n\u001b[0;32m    114\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m private_attributes \u001b[38;5;129;01mor\u001b[39;00m base_private_attributes:\n\u001b[0;32m    116\u001b[0m     original_model_post_init \u001b[38;5;241m=\u001b[39m get_model_post_init(namespace, bases)\n",
-      "File \u001b[1;32mc:\\Users\\mabel\\Anaconda3\\envs\\hf_agent\\lib\\site-packages\\pydantic\\_internal\\_model_construction.py:449\u001b[0m, in \u001b[0;36minspect_namespace\u001b[1;34m(namespace, ignored_types, base_class_vars, base_class_fields)\u001b[0m\n\u001b[0;32m    447\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m var_name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m raw_annotations:\n\u001b[0;32m    448\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m var_name \u001b[38;5;129;01min\u001b[39;00m base_class_fields:\n\u001b[1;32m--> 449\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m PydanticUserError(\n\u001b[0;32m    450\u001b[0m             \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mField \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvar_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m defined on a base class was overridden by a non-annotated attribute. \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m    451\u001b[0m             \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAll field definitions, including overrides, require a type annotation.\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m    452\u001b[0m             code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel-field-overridden\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m    453\u001b[0m         )\n\u001b[0;32m    454\u001b[0m     \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, FieldInfo):\n\u001b[0;32m    455\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m PydanticUserError(\n\u001b[0;32m    456\u001b[0m             \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mField \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvar_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m requires a type annotation\u001b[39m\u001b[38;5;124m'\u001b[39m, code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel-field-missing-annotation\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m    457\u001b[0m         )\n",
-      "\u001b[1;31mPydanticUserError\u001b[0m: Field 'name' defined on a base class was overridden by a non-annotated attribute. All field definitions, including overrides, require a type annotation.\n\nFor further information visit https://errors.pydantic.dev/2.11/u/model-field-overridden"
-     ]
-    }
-   ],
-   "source": [
-    "# build tools for the agent with the following functions: websearch,calculator,Image recognition tools, image viewer, pdf viewer, pdf accesser\n",
-    "# use langchain and other libraries to build the tools\n",
-    "\n",
-    "from smolagents import CodeAgent, HfApiModel, Tool\n",
-    "from langchain.agents import load_tools\n",
-    "from langchain.tools import Tool as LangchainTool\n",
-    "\n",
-    "# Import required libraries\n",
-    "from langchain.tools import DuckDuckGoSearchRun\n",
-    "from langchain.utilities import WikipediaAPIWrapper\n",
-    "from langchain.tools import BaseTool\n",
-    "from langchain.callbacks.manager import CallbackManagerForToolRun\n",
-    "from typing import Optional, Type\n",
-    "from langchain.tools import ShellTool\n",
-    "from PIL import Image\n",
-    "import pytesseract\n",
-    "import requests\n",
-    "from io import BytesIO\n",
-    "import fitz  # PyMuPDF for PDF handling\n",
-    "\n",
-    "# Web Search Tool using DuckDuckGo (free)\n",
-    "search = DuckDuckGoSearchRun()\n",
-    "web_search_tool = Tool(\n",
-    "    name=\"Web Search\",\n",
-    "    description=\"Search the web for current information using DuckDuckGo\",\n",
-    "    func=search.run\n",
-    ")\n",
-    "\n",
-    "# Wikipedia Tool (free)\n",
-    "wikipedia = WikipediaAPIWrapper()\n",
-    "wikipedia_tool = Tool(\n",
-    "    name=\"Wikipedia\",\n",
-    "    description=\"Search Wikipedia articles for information\",\n",
-    "    func=wikipedia.run\n",
-    ")\n",
-    "\n",
-    "# Basic Calculator Tool (free)\n",
-    "class CalculatorTool(BaseTool):\n",
-    "    name = \"Calculator\"\n",
-    "    description = \"Useful for performing mathematical calculations\"\n",
-    "    \n",
-    "    def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:\n",
-    "        client = wolframalpha.Client(WOLFRAM_ALPHA_APPID)\n",
-    "        res = client.query(query)\n",
-    "        return next(res.results).text\n",
-    "\n",
-    "# Image Recognition Tool (using local Tesseract OCR)\n",
-    "class ImageRecognitionTool(BaseTool):\n",
-    "    name = \"Image Recognition\"\n",
-    "    description = \"Analyze and extract text from images using OCR\"\n",
-    "    \n",
-    "    def _run(self, image_path: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:\n",
-    "        try:\n",
-    "            img = Image.open(image_path)\n",
-    "            text = pytesseract.image_to_string(img)\n",
-    "            return text\n",
-    "        except Exception as e:\n",
-    "            return f\"Error processing image: {str(e)}\"\n",
-    "\n",
-    "# PDF Reader Tool\n",
-    "class PDFReaderTool(BaseTool):\n",
-    "    name = \"PDF Reader\"\n",
-    "    description = \"Read and extract text from PDF documents\"\n",
-    "    \n",
-    "    def _run(self, pdf_path: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:\n",
-    "        try:\n",
-    "            doc = fitz.open(pdf_path)\n",
-    "            text = \"\"\n",
-    "            for page in doc:\n",
-    "                text += page.get_text()\n",
-    "            return text\n",
-    "        except Exception as e:\n",
-    "            return f\"Error reading PDF: {str(e)}\"\n",
-    "\n",
-    "\n",
-    "\n",
-    "# Test the agent (uncomment to run)\n",
-    "# response = agent.run(\"Calculate 234 * 789 and then search for information about the result\")\n"
    ]
   }
  ],

     "df['Annotator Metadata'][1]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "73c7bbaa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "In July 2, 1959 United States standards for grades of processed fruits,\n",
+      "vegetables, and certain other products listed as dehydrated, consider the items\n",
+      "in the \"dried and dehydrated section\" specifically marked as dehydrated along\n",
+      "with any items in the Frozen/Chilled section that contain the whole name of the\n",
+      "item, but not if they're marked Chilled. As of August 2023, what is the\n",
+      "percentage (to the nearest percent) of those standards that have been superseded\n",
+      "by a new version since the date given in the 1959 standards?\n"
+     ]
+    }
+   ],
+   "source": [
+    "#print the question and answer for the 11th row, print it out in wrapped text.\n",
+    "import textwrap\n",
+    "print(textwrap.fill(df['Question'][10], width=80))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "f30fb061",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'Steps': '1. Search the web for \"PDB ID 5wb7\"\\n2. Navigate to https://www.rcsb.org/structure/5wb7 from the search results page\\n3. Download the PDB file from the landing page.\\n4. Process the PDB file using Python and Biopython to calculate the distance between the first two atoms listed in the file. (1.4564234018325806 Å)\\nfrom Bio.PDB import PDBParser\\nparser = PDBParser()\\nstructure = parser.get_structure(\"5wb7\", \"5wb7.pdb\")\\nfor atom in structure.get_atoms():\\n    atom1 = atom\\n    break\\nfor atom in structure.get_atoms():\\n    if atom != atom1:\\n        atom2 = atom\\n        break\\ndistance = atom1 - atom2\\nprint(f\"{distance}\")\\n5. Round the result to the nearest picometer (1.456)',\n",
+       " 'Number of steps': '5',\n",
+       " 'How long did this take?': '45 minutes',\n",
+       " 'Tools': '1. Web browser\\n2. Search engine\\n3. File handling\\n4. Python\\n5. Calculator ',\n",
+       " 'Number of tools': '5'}"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df['Annotator Metadata'][11]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
   },
   {
    "cell_type": "code",
+   "execution_count": 37,
    "id": "1f0a65e7",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 57,
+   "id": "a6f475af",
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Tool</th>\n",
+       "      <th>Count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Web browser</td>\n",
+       "      <td>95</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Search engine</td>\n",
+       "      <td>88</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Calculator</td>\n",
+       "      <td>30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Image recognition tools</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>search engine</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>88</th>\n",
+       "      <td>Wikipedia</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>89</th>\n",
+       "      <td>Video capability</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>90</th>\n",
+       "      <td>Image processing tools</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>91</th>\n",
+       "      <td>Image recognition software</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>92</th>\n",
+       "      <td>YouTube</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>93 rows × 2 columns</p>\n",
+       "</div>"
+      ],
       "text/plain": [
+       "                          Tool  Count\n",
+       "0                  Web browser     95\n",
+       "1                Search engine     88\n",
+       "2                   Calculator     30\n",
+       "3      Image recognition tools     11\n",
+       "4                search engine      9\n",
+       "..                         ...    ...\n",
+       "88                   Wikipedia      1\n",
+       "89            Video capability      1\n",
+       "90      Image processing tools      1\n",
+       "91  Image recognition software      1\n",
+       "92                     YouTube      1\n",
+       "\n",
+       "[93 rows x 2 columns]"
       ]
      },
+     "execution_count": 57,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "tool_counts_df"
    ]
   }
  ],