Spaces:
Sleeping
Sleeping
Commit ·
fec32f4
1
Parent(s): cbc2dad
test agent result
Browse files- test_agent.py +73 -0
- view_jsonfile.ipynb +154 -118
test_agent.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from agent import MyAgent
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_agent(
|
| 7 |
+
metadata_path: str = "metadata.jsonl",
|
| 8 |
+
max_tests: int = 5,
|
| 9 |
+
):
|
| 10 |
+
"""
|
| 11 |
+
Load up to max_tests questions from the GAIA metadata JSONL file
|
| 12 |
+
and run them through MyAgent for a quick functionality check.
|
| 13 |
+
"""
|
| 14 |
+
# Initialize agent
|
| 15 |
+
agent = MyAgent()
|
| 16 |
+
|
| 17 |
+
metadata_file = Path(metadata_path)
|
| 18 |
+
if not metadata_file.exists():
|
| 19 |
+
print(f"Metadata file not found: {metadata_path}")
|
| 20 |
+
return
|
| 21 |
+
|
| 22 |
+
with open(metadata_file, "r", encoding="utf-8") as f:
|
| 23 |
+
for i, line in enumerate(f):
|
| 24 |
+
if i >= max_tests:
|
| 25 |
+
break
|
| 26 |
+
try:
|
| 27 |
+
meta = json.loads(line)
|
| 28 |
+
except json.JSONDecodeError:
|
| 29 |
+
print(f"Invalid JSON on line {i+1}")
|
| 30 |
+
continue
|
| 31 |
+
|
| 32 |
+
# Support both 'task_id' and 'id'
|
| 33 |
+
task_id = meta.get("task_id") or meta.get("id") or ""
|
| 34 |
+
# Support both 'question' and 'text'
|
| 35 |
+
question = meta.get("Question") or meta.get("text") or ""
|
| 36 |
+
|
| 37 |
+
print(f"--- Test {i+1}/{max_tests}: Task ID {task_id} ---")
|
| 38 |
+
print(f"Question: {question}")
|
| 39 |
+
|
| 40 |
+
if not question:
|
| 41 |
+
print("Skipping: no question found\n")
|
| 42 |
+
continue
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
# If there's a file_name field, pass it to agent.run
|
| 46 |
+
file_arg = None
|
| 47 |
+
if meta.get("file_name"):
|
| 48 |
+
file_arg = meta.get("file_name")
|
| 49 |
+
# Call agent with question and optional file
|
| 50 |
+
if file_arg:
|
| 51 |
+
answer = agent.run(question, file_paths=[file_arg])
|
| 52 |
+
else:
|
| 53 |
+
answer = agent.run(question)
|
| 54 |
+
print(f"Answer: {answer}\n")
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"Error running agent on question '{question}': {e}\n")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
if __name__ == "__main__":
|
| 60 |
+
import argparse
|
| 61 |
+
|
| 62 |
+
parser = argparse.ArgumentParser(description="Test MyAgent with GAIA metadata.")
|
| 63 |
+
parser.add_argument(
|
| 64 |
+
"--metadata", type=str, default="metadata.jsonl",
|
| 65 |
+
help="Path to GAIA metadata JSONL"
|
| 66 |
+
)
|
| 67 |
+
parser.add_argument(
|
| 68 |
+
"--max", type=int, default=5,
|
| 69 |
+
help="Maximum number of tests to run"
|
| 70 |
+
)
|
| 71 |
+
args = parser.parse_args()
|
| 72 |
+
test_agent(args.metadata, args.max)
|
| 73 |
+
|
view_jsonfile.ipynb
CHANGED
|
@@ -166,6 +166,57 @@
|
|
| 166 |
"df['Annotator Metadata'][1]"
|
| 167 |
]
|
| 168 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
{
|
| 170 |
"cell_type": "code",
|
| 171 |
"execution_count": null,
|
|
@@ -194,7 +245,7 @@
|
|
| 194 |
},
|
| 195 |
{
|
| 196 |
"cell_type": "code",
|
| 197 |
-
"execution_count":
|
| 198 |
"id": "1f0a65e7",
|
| 199 |
"metadata": {},
|
| 200 |
"outputs": [
|
|
@@ -225,135 +276,120 @@
|
|
| 225 |
},
|
| 226 |
{
|
| 227 |
"cell_type": "code",
|
| 228 |
-
"execution_count":
|
| 229 |
-
"id": "
|
| 230 |
"metadata": {},
|
| 231 |
"outputs": [
|
| 232 |
{
|
| 233 |
"data": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
"text/plain": [
|
| 235 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
]
|
| 237 |
},
|
| 238 |
-
"execution_count":
|
| 239 |
"metadata": {},
|
| 240 |
"output_type": "execute_result"
|
| 241 |
}
|
| 242 |
],
|
| 243 |
"source": [
|
| 244 |
-
"
|
| 245 |
-
"import requests\n",
|
| 246 |
-
"import json\n",
|
| 247 |
-
"import base64\n",
|
| 248 |
-
" \n",
|
| 249 |
-
"import numpy as np\n",
|
| 250 |
-
"from smolagents import CodeAgent, HfApiModel, Tool\n",
|
| 251 |
-
"from langchain.agents import load_tools\n",
|
| 252 |
-
"\n",
|
| 253 |
-
"#load env variables\n",
|
| 254 |
-
"from dotenv import load_dotenv\n",
|
| 255 |
-
"load_dotenv()\n"
|
| 256 |
-
]
|
| 257 |
-
},
|
| 258 |
-
{
|
| 259 |
-
"cell_type": "code",
|
| 260 |
-
"execution_count": 52,
|
| 261 |
-
"id": "00f79e78",
|
| 262 |
-
"metadata": {},
|
| 263 |
-
"outputs": [
|
| 264 |
-
{
|
| 265 |
-
"ename": "PydanticUserError",
|
| 266 |
-
"evalue": "Field 'name' defined on a base class was overridden by a non-annotated attribute. All field definitions, including overrides, require a type annotation.\n\nFor further information visit https://errors.pydantic.dev/2.11/u/model-field-overridden",
|
| 267 |
-
"output_type": "error",
|
| 268 |
-
"traceback": [
|
| 269 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 270 |
-
"\u001b[1;31mPydanticUserError\u001b[0m Traceback (most recent call last)",
|
| 271 |
-
"Cell \u001b[1;32mIn[52], line 38\u001b[0m\n\u001b[0;32m 31\u001b[0m wikipedia_tool \u001b[38;5;241m=\u001b[39m Tool(\n\u001b[0;32m 32\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWikipedia\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 33\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSearch Wikipedia articles for information\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 34\u001b[0m func\u001b[38;5;241m=\u001b[39mwikipedia\u001b[38;5;241m.\u001b[39mrun\n\u001b[0;32m 35\u001b[0m )\n\u001b[0;32m 37\u001b[0m \u001b[38;5;66;03m# Basic Calculator Tool (free)\u001b[39;00m\n\u001b[1;32m---> 38\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mCalculatorTool\u001b[39;00m(BaseTool):\n\u001b[0;32m 39\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCalculator\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 40\u001b[0m description \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUseful for performing mathematical calculations\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
|
| 272 |
-
"File \u001b[1;32mc:\\Users\\mabel\\Anaconda3\\envs\\hf_agent\\lib\\site-packages\\pydantic\\_internal\\_model_construction.py:112\u001b[0m, in \u001b[0;36mModelMetaclass.__new__\u001b[1;34m(mcs, cls_name, bases, namespace, __pydantic_generic_metadata__, __pydantic_reset_parent_namespace__, _create_model_module, **kwargs)\u001b[0m\n\u001b[0;32m 110\u001b[0m config_wrapper \u001b[38;5;241m=\u001b[39m ConfigWrapper\u001b[38;5;241m.\u001b[39mfor_model(bases, namespace, kwargs)\n\u001b[0;32m 111\u001b[0m namespace[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_config\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m config_wrapper\u001b[38;5;241m.\u001b[39mconfig_dict\n\u001b[1;32m--> 112\u001b[0m private_attributes \u001b[38;5;241m=\u001b[39m \u001b[43minspect_namespace\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 113\u001b[0m \u001b[43m \u001b[49m\u001b[43mnamespace\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig_wrapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mignored_types\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mclass_vars\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbase_field_names\u001b[49m\n\u001b[0;32m 114\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m private_attributes \u001b[38;5;129;01mor\u001b[39;00m base_private_attributes:\n\u001b[0;32m 116\u001b[0m original_model_post_init \u001b[38;5;241m=\u001b[39m get_model_post_init(namespace, bases)\n",
|
| 273 |
-
"File \u001b[1;32mc:\\Users\\mabel\\Anaconda3\\envs\\hf_agent\\lib\\site-packages\\pydantic\\_internal\\_model_construction.py:449\u001b[0m, in \u001b[0;36minspect_namespace\u001b[1;34m(namespace, ignored_types, base_class_vars, base_class_fields)\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m var_name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m raw_annotations:\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m var_name \u001b[38;5;129;01min\u001b[39;00m base_class_fields:\n\u001b[1;32m--> 449\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PydanticUserError(\n\u001b[0;32m 450\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mField \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvar_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m defined on a base class was overridden by a non-annotated attribute. \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAll field definitions, including overrides, require a type annotation.\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 452\u001b[0m code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel-field-overridden\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 453\u001b[0m )\n\u001b[0;32m 454\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, FieldInfo):\n\u001b[0;32m 455\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PydanticUserError(\n\u001b[0;32m 456\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mField \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvar_name\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m requires a type annotation\u001b[39m\u001b[38;5;124m'\u001b[39m, code\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel-field-missing-annotation\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 457\u001b[0m )\n",
|
| 274 |
-
"\u001b[1;31mPydanticUserError\u001b[0m: Field 'name' defined on a base class was overridden by a non-annotated attribute. All field definitions, including overrides, require a type annotation.\n\nFor further information visit https://errors.pydantic.dev/2.11/u/model-field-overridden"
|
| 275 |
-
]
|
| 276 |
-
}
|
| 277 |
-
],
|
| 278 |
-
"source": [
|
| 279 |
-
"# build tools for the agent with the following functions: websearch,calculator,Image recognition tools, image viewer, pdf viewer, pdf accesser\n",
|
| 280 |
-
"# use langchain and other libraries to build the tools\n",
|
| 281 |
-
"\n",
|
| 282 |
-
"from smolagents import CodeAgent, HfApiModel, Tool\n",
|
| 283 |
-
"from langchain.agents import load_tools\n",
|
| 284 |
-
"from langchain.tools import Tool as LangchainTool\n",
|
| 285 |
-
"\n",
|
| 286 |
-
"# Import required libraries\n",
|
| 287 |
-
"from langchain.tools import DuckDuckGoSearchRun\n",
|
| 288 |
-
"from langchain.utilities import WikipediaAPIWrapper\n",
|
| 289 |
-
"from langchain.tools import BaseTool\n",
|
| 290 |
-
"from langchain.callbacks.manager import CallbackManagerForToolRun\n",
|
| 291 |
-
"from typing import Optional, Type\n",
|
| 292 |
-
"from langchain.tools import ShellTool\n",
|
| 293 |
-
"from PIL import Image\n",
|
| 294 |
-
"import pytesseract\n",
|
| 295 |
-
"import requests\n",
|
| 296 |
-
"from io import BytesIO\n",
|
| 297 |
-
"import fitz # PyMuPDF for PDF handling\n",
|
| 298 |
-
"\n",
|
| 299 |
-
"# Web Search Tool using DuckDuckGo (free)\n",
|
| 300 |
-
"search = DuckDuckGoSearchRun()\n",
|
| 301 |
-
"web_search_tool = Tool(\n",
|
| 302 |
-
" name=\"Web Search\",\n",
|
| 303 |
-
" description=\"Search the web for current information using DuckDuckGo\",\n",
|
| 304 |
-
" func=search.run\n",
|
| 305 |
-
")\n",
|
| 306 |
-
"\n",
|
| 307 |
-
"# Wikipedia Tool (free)\n",
|
| 308 |
-
"wikipedia = WikipediaAPIWrapper()\n",
|
| 309 |
-
"wikipedia_tool = Tool(\n",
|
| 310 |
-
" name=\"Wikipedia\",\n",
|
| 311 |
-
" description=\"Search Wikipedia articles for information\",\n",
|
| 312 |
-
" func=wikipedia.run\n",
|
| 313 |
-
")\n",
|
| 314 |
-
"\n",
|
| 315 |
-
"# Basic Calculator Tool (free)\n",
|
| 316 |
-
"class CalculatorTool(BaseTool):\n",
|
| 317 |
-
" name = \"Calculator\"\n",
|
| 318 |
-
" description = \"Useful for performing mathematical calculations\"\n",
|
| 319 |
-
" \n",
|
| 320 |
-
" def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:\n",
|
| 321 |
-
" client = wolframalpha.Client(WOLFRAM_ALPHA_APPID)\n",
|
| 322 |
-
" res = client.query(query)\n",
|
| 323 |
-
" return next(res.results).text\n",
|
| 324 |
-
"\n",
|
| 325 |
-
"# Image Recognition Tool (using local Tesseract OCR)\n",
|
| 326 |
-
"class ImageRecognitionTool(BaseTool):\n",
|
| 327 |
-
" name = \"Image Recognition\"\n",
|
| 328 |
-
" description = \"Analyze and extract text from images using OCR\"\n",
|
| 329 |
-
" \n",
|
| 330 |
-
" def _run(self, image_path: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:\n",
|
| 331 |
-
" try:\n",
|
| 332 |
-
" img = Image.open(image_path)\n",
|
| 333 |
-
" text = pytesseract.image_to_string(img)\n",
|
| 334 |
-
" return text\n",
|
| 335 |
-
" except Exception as e:\n",
|
| 336 |
-
" return f\"Error processing image: {str(e)}\"\n",
|
| 337 |
-
"\n",
|
| 338 |
-
"# PDF Reader Tool\n",
|
| 339 |
-
"class PDFReaderTool(BaseTool):\n",
|
| 340 |
-
" name = \"PDF Reader\"\n",
|
| 341 |
-
" description = \"Read and extract text from PDF documents\"\n",
|
| 342 |
-
" \n",
|
| 343 |
-
" def _run(self, pdf_path: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:\n",
|
| 344 |
-
" try:\n",
|
| 345 |
-
" doc = fitz.open(pdf_path)\n",
|
| 346 |
-
" text = \"\"\n",
|
| 347 |
-
" for page in doc:\n",
|
| 348 |
-
" text += page.get_text()\n",
|
| 349 |
-
" return text\n",
|
| 350 |
-
" except Exception as e:\n",
|
| 351 |
-
" return f\"Error reading PDF: {str(e)}\"\n",
|
| 352 |
-
"\n",
|
| 353 |
-
"\n",
|
| 354 |
-
"\n",
|
| 355 |
-
"# Test the agent (uncomment to run)\n",
|
| 356 |
-
"# response = agent.run(\"Calculate 234 * 789 and then search for information about the result\")\n"
|
| 357 |
]
|
| 358 |
}
|
| 359 |
],
|
|
|
|
| 166 |
"df['Annotator Metadata'][1]"
|
| 167 |
]
|
| 168 |
},
|
| 169 |
+
{
|
| 170 |
+
"cell_type": "code",
|
| 171 |
+
"execution_count": 65,
|
| 172 |
+
"id": "73c7bbaa",
|
| 173 |
+
"metadata": {},
|
| 174 |
+
"outputs": [
|
| 175 |
+
{
|
| 176 |
+
"name": "stdout",
|
| 177 |
+
"output_type": "stream",
|
| 178 |
+
"text": [
|
| 179 |
+
"In July 2, 1959 United States standards for grades of processed fruits,\n",
|
| 180 |
+
"vegetables, and certain other products listed as dehydrated, consider the items\n",
|
| 181 |
+
"in the \"dried and dehydrated section\" specifically marked as dehydrated along\n",
|
| 182 |
+
"with any items in the Frozen/Chilled section that contain the whole name of the\n",
|
| 183 |
+
"item, but not if they're marked Chilled. As of August 2023, what is the\n",
|
| 184 |
+
"percentage (to the nearest percent) of those standards that have been superseded\n",
|
| 185 |
+
"by a new version since the date given in the 1959 standards?\n"
|
| 186 |
+
]
|
| 187 |
+
}
|
| 188 |
+
],
|
| 189 |
+
"source": [
|
| 190 |
+
"#print the question and answer for the 11th row, print it out in wrapped text.\n",
|
| 191 |
+
"import textwrap\n",
|
| 192 |
+
"print(textwrap.fill(df['Question'][10], width=80))"
|
| 193 |
+
]
|
| 194 |
+
},
|
| 195 |
+
{
|
| 196 |
+
"cell_type": "code",
|
| 197 |
+
"execution_count": 62,
|
| 198 |
+
"id": "f30fb061",
|
| 199 |
+
"metadata": {},
|
| 200 |
+
"outputs": [
|
| 201 |
+
{
|
| 202 |
+
"data": {
|
| 203 |
+
"text/plain": [
|
| 204 |
+
"{'Steps': '1. Search the web for \"PDB ID 5wb7\"\\n2. Navigate to https://www.rcsb.org/structure/5wb7 from the search results page\\n3. Download the PDB file from the landing page.\\n4. Process the PDB file using Python and Biopython to calculate the distance between the first two atoms listed in the file. (1.4564234018325806 Å)\\nfrom Bio.PDB import PDBParser\\nparser = PDBParser()\\nstructure = parser.get_structure(\"5wb7\", \"5wb7.pdb\")\\nfor atom in structure.get_atoms():\\n atom1 = atom\\n break\\nfor atom in structure.get_atoms():\\n if atom != atom1:\\n atom2 = atom\\n break\\ndistance = atom1 - atom2\\nprint(f\"{distance}\")\\n5. Round the result to the nearest picometer (1.456)',\n",
|
| 205 |
+
" 'Number of steps': '5',\n",
|
| 206 |
+
" 'How long did this take?': '45 minutes',\n",
|
| 207 |
+
" 'Tools': '1. Web browser\\n2. Search engine\\n3. File handling\\n4. Python\\n5. Calculator ',\n",
|
| 208 |
+
" 'Number of tools': '5'}"
|
| 209 |
+
]
|
| 210 |
+
},
|
| 211 |
+
"execution_count": 62,
|
| 212 |
+
"metadata": {},
|
| 213 |
+
"output_type": "execute_result"
|
| 214 |
+
}
|
| 215 |
+
],
|
| 216 |
+
"source": [
|
| 217 |
+
"df['Annotator Metadata'][11]"
|
| 218 |
+
]
|
| 219 |
+
},
|
| 220 |
{
|
| 221 |
"cell_type": "code",
|
| 222 |
"execution_count": null,
|
|
|
|
| 245 |
},
|
| 246 |
{
|
| 247 |
"cell_type": "code",
|
| 248 |
+
"execution_count": 37,
|
| 249 |
"id": "1f0a65e7",
|
| 250 |
"metadata": {},
|
| 251 |
"outputs": [
|
|
|
|
| 276 |
},
|
| 277 |
{
|
| 278 |
"cell_type": "code",
|
| 279 |
+
"execution_count": 57,
|
| 280 |
+
"id": "a6f475af",
|
| 281 |
"metadata": {},
|
| 282 |
"outputs": [
|
| 283 |
{
|
| 284 |
"data": {
|
| 285 |
+
"text/html": [
|
| 286 |
+
"<div>\n",
|
| 287 |
+
"<style scoped>\n",
|
| 288 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 289 |
+
" vertical-align: middle;\n",
|
| 290 |
+
" }\n",
|
| 291 |
+
"\n",
|
| 292 |
+
" .dataframe tbody tr th {\n",
|
| 293 |
+
" vertical-align: top;\n",
|
| 294 |
+
" }\n",
|
| 295 |
+
"\n",
|
| 296 |
+
" .dataframe thead th {\n",
|
| 297 |
+
" text-align: right;\n",
|
| 298 |
+
" }\n",
|
| 299 |
+
"</style>\n",
|
| 300 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 301 |
+
" <thead>\n",
|
| 302 |
+
" <tr style=\"text-align: right;\">\n",
|
| 303 |
+
" <th></th>\n",
|
| 304 |
+
" <th>Tool</th>\n",
|
| 305 |
+
" <th>Count</th>\n",
|
| 306 |
+
" </tr>\n",
|
| 307 |
+
" </thead>\n",
|
| 308 |
+
" <tbody>\n",
|
| 309 |
+
" <tr>\n",
|
| 310 |
+
" <th>0</th>\n",
|
| 311 |
+
" <td>Web browser</td>\n",
|
| 312 |
+
" <td>95</td>\n",
|
| 313 |
+
" </tr>\n",
|
| 314 |
+
" <tr>\n",
|
| 315 |
+
" <th>1</th>\n",
|
| 316 |
+
" <td>Search engine</td>\n",
|
| 317 |
+
" <td>88</td>\n",
|
| 318 |
+
" </tr>\n",
|
| 319 |
+
" <tr>\n",
|
| 320 |
+
" <th>2</th>\n",
|
| 321 |
+
" <td>Calculator</td>\n",
|
| 322 |
+
" <td>30</td>\n",
|
| 323 |
+
" </tr>\n",
|
| 324 |
+
" <tr>\n",
|
| 325 |
+
" <th>3</th>\n",
|
| 326 |
+
" <td>Image recognition tools</td>\n",
|
| 327 |
+
" <td>11</td>\n",
|
| 328 |
+
" </tr>\n",
|
| 329 |
+
" <tr>\n",
|
| 330 |
+
" <th>4</th>\n",
|
| 331 |
+
" <td>search engine</td>\n",
|
| 332 |
+
" <td>9</td>\n",
|
| 333 |
+
" </tr>\n",
|
| 334 |
+
" <tr>\n",
|
| 335 |
+
" <th>...</th>\n",
|
| 336 |
+
" <td>...</td>\n",
|
| 337 |
+
" <td>...</td>\n",
|
| 338 |
+
" </tr>\n",
|
| 339 |
+
" <tr>\n",
|
| 340 |
+
" <th>88</th>\n",
|
| 341 |
+
" <td>Wikipedia</td>\n",
|
| 342 |
+
" <td>1</td>\n",
|
| 343 |
+
" </tr>\n",
|
| 344 |
+
" <tr>\n",
|
| 345 |
+
" <th>89</th>\n",
|
| 346 |
+
" <td>Video capability</td>\n",
|
| 347 |
+
" <td>1</td>\n",
|
| 348 |
+
" </tr>\n",
|
| 349 |
+
" <tr>\n",
|
| 350 |
+
" <th>90</th>\n",
|
| 351 |
+
" <td>Image processing tools</td>\n",
|
| 352 |
+
" <td>1</td>\n",
|
| 353 |
+
" </tr>\n",
|
| 354 |
+
" <tr>\n",
|
| 355 |
+
" <th>91</th>\n",
|
| 356 |
+
" <td>Image recognition software</td>\n",
|
| 357 |
+
" <td>1</td>\n",
|
| 358 |
+
" </tr>\n",
|
| 359 |
+
" <tr>\n",
|
| 360 |
+
" <th>92</th>\n",
|
| 361 |
+
" <td>YouTube</td>\n",
|
| 362 |
+
" <td>1</td>\n",
|
| 363 |
+
" </tr>\n",
|
| 364 |
+
" </tbody>\n",
|
| 365 |
+
"</table>\n",
|
| 366 |
+
"<p>93 rows × 2 columns</p>\n",
|
| 367 |
+
"</div>"
|
| 368 |
+
],
|
| 369 |
"text/plain": [
|
| 370 |
+
" Tool Count\n",
|
| 371 |
+
"0 Web browser 95\n",
|
| 372 |
+
"1 Search engine 88\n",
|
| 373 |
+
"2 Calculator 30\n",
|
| 374 |
+
"3 Image recognition tools 11\n",
|
| 375 |
+
"4 search engine 9\n",
|
| 376 |
+
".. ... ...\n",
|
| 377 |
+
"88 Wikipedia 1\n",
|
| 378 |
+
"89 Video capability 1\n",
|
| 379 |
+
"90 Image processing tools 1\n",
|
| 380 |
+
"91 Image recognition software 1\n",
|
| 381 |
+
"92 YouTube 1\n",
|
| 382 |
+
"\n",
|
| 383 |
+
"[93 rows x 2 columns]"
|
| 384 |
]
|
| 385 |
},
|
| 386 |
+
"execution_count": 57,
|
| 387 |
"metadata": {},
|
| 388 |
"output_type": "execute_result"
|
| 389 |
}
|
| 390 |
],
|
| 391 |
"source": [
|
| 392 |
+
"tool_counts_df"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
]
|
| 394 |
}
|
| 395 |
],
|