NeerajCodz commited on
Commit
fa40af9
·
1 Parent(s): 48f04de

feat: add comprehensive plugin registry with 71 tools across 12 plugins

Browse files
backend/app/api/routes/plugins.py CHANGED
@@ -5,6 +5,16 @@ from typing import Any
5
  from fastapi import APIRouter, HTTPException
6
  from pydantic import BaseModel, Field
7
 
 
 
 
 
 
 
 
 
 
 
8
  router = APIRouter(prefix="/plugins", tags=["plugins"])
9
 
10
  # Plugin registry - available plugins
@@ -402,8 +412,94 @@ async def get_categories() -> dict[str, Any]:
402
  }
403
 
404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  @router.get("/{plugin_id}")
406
- async def get_plugin(plugin_id: str) -> PluginResponse:
407
  """Get details about a specific plugin."""
408
  for plugins in PLUGIN_REGISTRY.values():
409
  for plugin in plugins:
@@ -499,5 +595,3 @@ async def uninstall_plugin(action: PluginAction) -> dict[str, Any]:
499
  "message": f"Plugin {plugin['name']} uninstalled successfully",
500
  "plugin": {**plugin, "installed": False},
501
  }
502
-
503
-
 
5
  from fastapi import APIRouter, HTTPException
6
  from pydantic import BaseModel, Field
7
 
8
+ from app.plugins.registry import (
9
+ get_all_plugins,
10
+ get_all_tools,
11
+ get_plugin,
12
+ get_plugin_summary,
13
+ get_tool,
14
+ get_tools_by_category,
15
+ PluginCategory,
16
+ )
17
+
18
  router = APIRouter(prefix="/plugins", tags=["plugins"])
19
 
20
  # Plugin registry - available plugins
 
412
  }
413
 
414
 
415
+ # ==============================================================================
416
+ # Tool Registry Endpoints (must be before /{plugin_id} catch-all)
417
+ # ==============================================================================
418
+
419
+
420
+ @router.get("/tools")
421
+ async def list_tools(category: str | None = None) -> dict[str, Any]:
422
+ """List all available tools from plugin registry."""
423
+ if category:
424
+ try:
425
+ cat = PluginCategory(category)
426
+ tools = get_tools_by_category(cat)
427
+ except ValueError:
428
+ tools = []
429
+ else:
430
+ tools = get_all_tools()
431
+
432
+ return {
433
+ "tools": [
434
+ {
435
+ "name": t.name,
436
+ "description": t.description,
437
+ "category": t.category.value,
438
+ "parameters": t.parameters,
439
+ "returns": t.returns,
440
+ }
441
+ for t in tools
442
+ ],
443
+ "count": len(tools),
444
+ }
445
+
446
+
447
+ @router.get("/tools/{tool_name:path}")
448
+ async def get_tool_details(tool_name: str) -> dict[str, Any]:
449
+ """Get details about a specific tool."""
450
+ tool = get_tool(tool_name)
451
+ if not tool:
452
+ raise HTTPException(status_code=404, detail=f"Tool not found: {tool_name}")
453
+
454
+ return {
455
+ "name": tool.name,
456
+ "description": tool.description,
457
+ "category": tool.category.value,
458
+ "parameters": tool.parameters,
459
+ "returns": tool.returns,
460
+ "examples": tool.examples,
461
+ }
462
+
463
+
464
+ @router.get("/registry")
465
+ async def get_registry_endpoint() -> dict[str, Any]:
466
+ """Get full plugin registry with all tools."""
467
+ plugins = get_all_plugins()
468
+
469
+ return {
470
+ "plugins": [
471
+ {
472
+ "id": p.id,
473
+ "name": p.name,
474
+ "description": p.description,
475
+ "category": p.category.value,
476
+ "version": p.version,
477
+ "enabled": p.enabled,
478
+ "tools": [
479
+ {
480
+ "name": t.name,
481
+ "description": t.description,
482
+ "parameters": t.parameters,
483
+ "returns": t.returns,
484
+ }
485
+ for t in p.tools
486
+ ],
487
+ "tools_count": len(p.tools),
488
+ }
489
+ for p in plugins
490
+ ],
491
+ "summary": get_plugin_summary(),
492
+ }
493
+
494
+
495
+ @router.get("/summary")
496
+ async def get_summary_endpoint() -> dict[str, Any]:
497
+ """Get summary of plugins and tools."""
498
+ return get_plugin_summary()
499
+
500
+
501
  @router.get("/{plugin_id}")
502
+ async def get_plugin_by_id(plugin_id: str) -> PluginResponse:
503
  """Get details about a specific plugin."""
504
  for plugins in PLUGIN_REGISTRY.values():
505
  for plugin in plugins:
 
595
  "message": f"Plugin {plugin['name']} uninstalled successfully",
596
  "plugin": {**plugin, "installed": False},
597
  }
 
 
backend/app/api/routes/scrape.py CHANGED
@@ -270,6 +270,43 @@ def _list_session_artifacts(session: dict[str, Any]) -> list[str]:
270
  return sorted([file.name for file in base.iterdir() if file.is_file()])
271
 
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  def _record_step(session: dict[str, Any], step: ScrapeStep) -> dict[str, Any]:
274
  """Store and return a step event payload."""
275
 
@@ -1115,6 +1152,7 @@ async def _scrape_github_trending(
1115
  stars = "0"
1116
  if stars_elem:
1117
  stars_text = stars_elem.get_text(strip=True)
 
1118
  stars = re.sub(r"[^\d,.]", "", stars_text)
1119
 
1120
  # Extract forks
@@ -1122,6 +1160,7 @@ async def _scrape_github_trending(
1122
  forks = "0"
1123
  if forks_elem:
1124
  forks_text = forks_elem.get_text(strip=True)
 
1125
  forks = re.sub(r"[^\d,.]", "", forks_text)
1126
 
1127
  trending_repos.append({
@@ -2143,6 +2182,39 @@ async def scrape_stream(
2143
  else:
2144
  session["errors"].append(planner_sandbox.error or "Planner sandbox execution failed")
2145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2146
  for idx, url in enumerate(resolved_assets):
2147
  session["current_url_index"] = idx
2148
  url_navigation_plan = _create_intelligent_navigation_plan(request.instructions, [url])
@@ -2377,6 +2449,62 @@ async def scrape_stream(
2377
  if isinstance(payload, dict) and isinstance(payload.get("content"), str):
2378
  html_samples[str(source)] = payload.get("content", "")
2379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2380
  analysis_payload = {
2381
  "instructions": request.instructions,
2382
  "output_instructions": request.output_instructions,
 
270
  return sorted([file.name for file in base.iterdir() if file.is_file()])
271
 
272
 
273
+ def _create_tool_call_step(
274
+ session: dict[str, Any],
275
+ tool_name: str,
276
+ description: str,
277
+ parameters: dict[str, Any],
278
+ status: str = "running",
279
+ result: dict[str, Any] | None = None,
280
+ reward: float = 0.0,
281
+ url: str | None = None,
282
+ ) -> dict[str, Any]:
283
+ """Create a tool call step event."""
284
+ step_number = len(session.get("steps", [])) + 1
285
+ message = f"{tool_name}({', '.join(f'{k}={repr(v)[:20]}' for k, v in parameters.items())})"
286
+ if status == "completed" and result:
287
+ result_preview = ", ".join(f"{k}={v}" for k, v in list(result.items())[:2])
288
+ message = f"{tool_name}() → {result_preview[:50]}"
289
+
290
+ return _record_step(
291
+ session,
292
+ ScrapeStep(
293
+ step_number=step_number,
294
+ action="tool_call",
295
+ url=url,
296
+ status=status,
297
+ message=message,
298
+ reward=reward,
299
+ extracted_data={
300
+ "tool_name": tool_name,
301
+ "tool_description": description,
302
+ "parameters": parameters,
303
+ **({"result": result} if result else {}),
304
+ },
305
+ timestamp=_now_iso(),
306
+ ),
307
+ )
308
+
309
+
310
  def _record_step(session: dict[str, Any], step: ScrapeStep) -> dict[str, Any]:
311
  """Store and return a step event payload."""
312
 
 
1152
  stars = "0"
1153
  if stars_elem:
1154
  stars_text = stars_elem.get_text(strip=True)
1155
+ # Tool call: regex.sub (inline, no separate step for efficiency)
1156
  stars = re.sub(r"[^\d,.]", "", stars_text)
1157
 
1158
  # Extract forks
 
1160
  forks = "0"
1161
  if forks_elem:
1162
  forks_text = forks_elem.get_text(strip=True)
1163
+ # Tool call: regex.sub (inline, no separate step for efficiency)
1164
  forks = re.sub(r"[^\d,.]", "", forks_text)
1165
 
1166
  trending_repos.append({
 
2182
  else:
2183
  session["errors"].append(planner_sandbox.error or "Planner sandbox execution failed")
2184
 
2185
+ # Tool call: url.parse (validate and parse URLs)
2186
+ url_parse_event = _create_tool_call_step(
2187
+ session,
2188
+ "url.parse",
2189
+ "Parse and validate target URLs",
2190
+ {"urls": resolved_assets, "count": len(resolved_assets)},
2191
+ status="running",
2192
+ )
2193
+ await manager.broadcast(url_parse_event, session_id)
2194
+ yield _sse_event(url_parse_event)
2195
+
2196
+ parsed_urls = []
2197
+ for url in resolved_assets:
2198
+ parsed = urlparse(url)
2199
+ parsed_urls.append({
2200
+ "url": url,
2201
+ "scheme": parsed.scheme,
2202
+ "domain": parsed.netloc,
2203
+ "path": parsed.path,
2204
+ })
2205
+
2206
+ url_parse_result = _create_tool_call_step(
2207
+ session,
2208
+ "url.parse",
2209
+ "Parse and validate target URLs",
2210
+ {"urls": resolved_assets},
2211
+ status="completed",
2212
+ result={"parsed": len(parsed_urls), "domains": list(set(p["domain"] for p in parsed_urls))},
2213
+ reward=0.05,
2214
+ )
2215
+ await manager.broadcast(url_parse_result, session_id)
2216
+ yield _sse_event(url_parse_result)
2217
+
2218
  for idx, url in enumerate(resolved_assets):
2219
  session["current_url_index"] = idx
2220
  url_navigation_plan = _create_intelligent_navigation_plan(request.instructions, [url])
 
2449
  if isinstance(payload, dict) and isinstance(payload.get("content"), str):
2450
  html_samples[str(source)] = payload.get("content", "")
2451
 
2452
+ # Tool call: extract.urls (find URLs in content)
2453
+ if html_samples:
2454
+ extract_urls_event = _create_tool_call_step(
2455
+ session,
2456
+ "extract.urls",
2457
+ "Extract URLs from HTML content",
2458
+ {"sources": len(html_samples), "total_bytes": sum(len(h) for h in html_samples.values())},
2459
+ status="running",
2460
+ )
2461
+ await manager.broadcast(extract_urls_event, session_id)
2462
+ yield _sse_event(extract_urls_event)
2463
+
2464
+ all_urls = []
2465
+ for html in html_samples.values():
2466
+ all_urls.extend(re.findall(r'href=["\']([^"\']+)["\']', html[:50000])) # Limit search
2467
+
2468
+ extract_urls_result = _create_tool_call_step(
2469
+ session,
2470
+ "extract.urls",
2471
+ "Extract URLs from HTML content",
2472
+ {"sources": len(html_samples)},
2473
+ status="completed",
2474
+ result={"urls_found": len(all_urls), "unique": len(set(all_urls))},
2475
+ reward=0.05,
2476
+ )
2477
+ await manager.broadcast(extract_urls_result, session_id)
2478
+ yield _sse_event(extract_urls_result)
2479
+
2480
+ # Tool call: extract.emails (find emails in content)
2481
+ extract_emails_event = _create_tool_call_step(
2482
+ session,
2483
+ "extract.emails",
2484
+ "Extract email addresses from HTML content",
2485
+ {"sources": len(html_samples)},
2486
+ status="running",
2487
+ )
2488
+ await manager.broadcast(extract_emails_event, session_id)
2489
+ yield _sse_event(extract_emails_event)
2490
+
2491
+ all_emails = []
2492
+ email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
2493
+ for html in html_samples.values():
2494
+ all_emails.extend(re.findall(email_pattern, html[:50000]))
2495
+
2496
+ extract_emails_result = _create_tool_call_step(
2497
+ session,
2498
+ "extract.emails",
2499
+ "Extract email addresses from HTML content",
2500
+ {"sources": len(html_samples)},
2501
+ status="completed",
2502
+ result={"emails_found": len(all_emails), "unique": len(set(all_emails))},
2503
+ reward=0.02,
2504
+ )
2505
+ await manager.broadcast(extract_emails_result, session_id)
2506
+ yield _sse_event(extract_emails_result)
2507
+
2508
  analysis_payload = {
2509
  "instructions": request.instructions,
2510
  "output_instructions": request.output_instructions,
backend/app/plugins/registry.py ADDED
@@ -0,0 +1,774 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Plugin registry for scrapeRL - manages all available plugins and tools."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Callable, Optional
7
+ from enum import Enum
8
+
9
+
10
+ class PluginCategory(str, Enum):
11
+ """Categories of plugins."""
12
+ BROWSER = "browser"
13
+ PARSER = "parser"
14
+ DATA = "data"
15
+ NETWORK = "network"
16
+ MEDIA = "media"
17
+ ANALYSIS = "analysis"
18
+ EXTRACTION = "extraction"
19
+ VALIDATION = "validation"
20
+ STORAGE = "storage"
21
+ AI = "ai"
22
+
23
+
24
+ @dataclass
25
+ class ToolDefinition:
26
+ """Definition of a tool that can be called by agents."""
27
+ name: str
28
+ description: str
29
+ category: PluginCategory
30
+ parameters: dict[str, Any] = field(default_factory=dict)
31
+ returns: dict[str, Any] = field(default_factory=dict)
32
+ examples: list[str] = field(default_factory=list)
33
+
34
+
35
+ @dataclass
36
+ class PluginDefinition:
37
+ """Definition of a plugin with its tools."""
38
+ id: str
39
+ name: str
40
+ description: str
41
+ category: PluginCategory
42
+ tools: list[ToolDefinition] = field(default_factory=list)
43
+ enabled: bool = True
44
+ version: str = "1.0.0"
45
+
46
+
47
+ # ==============================================================================
48
+ # BROWSER TOOLS
49
+ # ==============================================================================
50
+
51
+ BROWSER_TOOLS = [
52
+ ToolDefinition(
53
+ name="browser.navigate",
54
+ description="Navigate browser to a URL and wait for page load",
55
+ category=PluginCategory.BROWSER,
56
+ parameters={"url": "string", "wait_for": "string (page_load|network_idle)"},
57
+ returns={"success": "bool", "html_length": "int", "status_code": "int"},
58
+ ),
59
+ ToolDefinition(
60
+ name="browser.click",
61
+ description="Click on an element matching the selector",
62
+ category=PluginCategory.BROWSER,
63
+ parameters={"selector": "string", "wait_after": "int (ms)"},
64
+ returns={"clicked": "bool", "element_found": "bool"},
65
+ ),
66
+ ToolDefinition(
67
+ name="browser.type",
68
+ description="Type text into an input field",
69
+ category=PluginCategory.BROWSER,
70
+ parameters={"selector": "string", "text": "string", "clear_first": "bool"},
71
+ returns={"typed": "bool", "element_found": "bool"},
72
+ ),
73
+ ToolDefinition(
74
+ name="browser.scroll",
75
+ description="Scroll the page or element",
76
+ category=PluginCategory.BROWSER,
77
+ parameters={"direction": "string (up|down|top|bottom)", "amount": "int (px)"},
78
+ returns={"scrolled": "bool", "new_position": "int"},
79
+ ),
80
+ ToolDefinition(
81
+ name="browser.screenshot",
82
+ description="Capture a screenshot of the page or element",
83
+ category=PluginCategory.BROWSER,
84
+ parameters={"selector": "string (optional)", "full_page": "bool"},
85
+ returns={"captured": "bool", "size_bytes": "int", "dimensions": "dict"},
86
+ ),
87
+ ToolDefinition(
88
+ name="browser.wait",
89
+ description="Wait for an element or condition",
90
+ category=PluginCategory.BROWSER,
91
+ parameters={"selector": "string", "timeout_ms": "int", "state": "string"},
92
+ returns={"found": "bool", "waited_ms": "int"},
93
+ ),
94
+ ToolDefinition(
95
+ name="browser.execute_js",
96
+ description="Execute JavaScript in browser context",
97
+ category=PluginCategory.BROWSER,
98
+ parameters={"script": "string", "args": "list"},
99
+ returns={"result": "any", "error": "string|null"},
100
+ ),
101
+ ToolDefinition(
102
+ name="browser.get_cookies",
103
+ description="Get cookies for current domain",
104
+ category=PluginCategory.BROWSER,
105
+ parameters={"domain": "string (optional)"},
106
+ returns={"cookies": "list[dict]", "count": "int"},
107
+ ),
108
+ ]
109
+
110
+ # ==============================================================================
111
+ # HTML/DOM PARSING TOOLS
112
+ # ==============================================================================
113
+
114
+ HTML_TOOLS = [
115
+ ToolDefinition(
116
+ name="html.parse",
117
+ description="Parse HTML document into structured DOM",
118
+ category=PluginCategory.PARSER,
119
+ parameters={"parser": "string (html.parser|lxml)", "content_length": "int"},
120
+ returns={"parsed": "bool", "soup_type": "string"},
121
+ ),
122
+ ToolDefinition(
123
+ name="html.select",
124
+ description="Select elements using CSS selector",
125
+ category=PluginCategory.PARSER,
126
+ parameters={"selector": "string", "limit": "int (optional)"},
127
+ returns={"elements_found": "int", "selector_used": "string"},
128
+ ),
129
+ ToolDefinition(
130
+ name="html.select_one",
131
+ description="Select first element matching CSS selector",
132
+ category=PluginCategory.PARSER,
133
+ parameters={"selector": "string"},
134
+ returns={"found": "bool", "tag": "string", "text": "string"},
135
+ ),
136
+ ToolDefinition(
137
+ name="html.find_all",
138
+ description="Find all elements by tag and attributes (bs4)",
139
+ category=PluginCategory.PARSER,
140
+ parameters={"tag": "string", "attrs": "dict", "recursive": "bool"},
141
+ returns={"elements_found": "int", "tags": "list[string]"},
142
+ ),
143
+ ToolDefinition(
144
+ name="html.get_text",
145
+ description="Extract text content from element or page",
146
+ category=PluginCategory.PARSER,
147
+ parameters={"selector": "string (optional)", "separator": "string"},
148
+ returns={"text": "string", "length": "int"},
149
+ ),
150
+ ToolDefinition(
151
+ name="html.get_attribute",
152
+ description="Get attribute value from element",
153
+ category=PluginCategory.PARSER,
154
+ parameters={"selector": "string", "attribute": "string"},
155
+ returns={"value": "string|null", "found": "bool"},
156
+ ),
157
+ ToolDefinition(
158
+ name="html.extract_links",
159
+ description="Extract all links from page",
160
+ category=PluginCategory.PARSER,
161
+ parameters={"base_url": "string", "filter_pattern": "string (optional)"},
162
+ returns={"links": "list[dict]", "count": "int"},
163
+ ),
164
+ ToolDefinition(
165
+ name="html.extract_images",
166
+ description="Extract all images with src and alt",
167
+ category=PluginCategory.PARSER,
168
+ parameters={"include_lazy": "bool"},
169
+ returns={"images": "list[dict]", "count": "int"},
170
+ ),
171
+ ToolDefinition(
172
+ name="html.extract_tables",
173
+ description="Extract HTML tables as structured data",
174
+ category=PluginCategory.PARSER,
175
+ parameters={"selector": "string (optional)"},
176
+ returns={"tables": "list[list[list]]", "count": "int"},
177
+ ),
178
+ ToolDefinition(
179
+ name="html.extract_forms",
180
+ description="Extract form structure and fields",
181
+ category=PluginCategory.PARSER,
182
+ parameters={"selector": "string (optional)"},
183
+ returns={"forms": "list[dict]", "count": "int"},
184
+ ),
185
+ ]
186
+
187
+ # ==============================================================================
188
+ # DATA PROCESSING TOOLS
189
+ # ==============================================================================
190
+
191
+ DATA_TOOLS = [
192
+ ToolDefinition(
193
+ name="json.parse",
194
+ description="Parse JSON string into object",
195
+ category=PluginCategory.DATA,
196
+ parameters={"text": "string"},
197
+ returns={"data": "any", "valid": "bool"},
198
+ ),
199
+ ToolDefinition(
200
+ name="json.dumps",
201
+ description="Convert object to JSON string",
202
+ category=PluginCategory.DATA,
203
+ parameters={"data": "any", "indent": "int", "sort_keys": "bool"},
204
+ returns={"output": "string", "length": "int"},
205
+ ),
206
+ ToolDefinition(
207
+ name="csv.generate",
208
+ description="Generate CSV from data",
209
+ category=PluginCategory.DATA,
210
+ parameters={"data": "list[dict]", "fields": "list[string]"},
211
+ returns={"csv": "string", "rows": "int", "columns": "int"},
212
+ ),
213
+ ToolDefinition(
214
+ name="csv.parse",
215
+ description="Parse CSV string into records",
216
+ category=PluginCategory.DATA,
217
+ parameters={"text": "string", "delimiter": "string", "has_header": "bool"},
218
+ returns={"records": "list[dict]", "rows": "int", "columns": "int"},
219
+ ),
220
+ ToolDefinition(
221
+ name="pandas.DataFrame",
222
+ description="Create pandas DataFrame from data",
223
+ category=PluginCategory.DATA,
224
+ parameters={"data": "list[dict]|dict"},
225
+ returns={"shape": "tuple", "columns": "list[string]", "dtypes": "dict"},
226
+ ),
227
+ ToolDefinition(
228
+ name="pandas.read_csv",
229
+ description="Read CSV data into DataFrame",
230
+ category=PluginCategory.DATA,
231
+ parameters={"content": "string", "sep": "string"},
232
+ returns={"shape": "tuple", "columns": "list[string]"},
233
+ ),
234
+ ToolDefinition(
235
+ name="pandas.to_csv",
236
+ description="Convert DataFrame to CSV",
237
+ category=PluginCategory.DATA,
238
+ parameters={"index": "bool"},
239
+ returns={"csv": "string", "rows": "int"},
240
+ ),
241
+ ToolDefinition(
242
+ name="pandas.describe",
243
+ description="Generate descriptive statistics",
244
+ category=PluginCategory.DATA,
245
+ parameters={"include": "string (all|numeric)"},
246
+ returns={"statistics": "dict", "columns": "list[string]"},
247
+ ),
248
+ ToolDefinition(
249
+ name="pandas.groupby",
250
+ description="Group data by columns and aggregate",
251
+ category=PluginCategory.DATA,
252
+ parameters={"by": "list[string]", "agg": "dict"},
253
+ returns={"groups": "int", "result": "dict"},
254
+ ),
255
+ ToolDefinition(
256
+ name="pandas.filter",
257
+ description="Filter DataFrame rows by condition",
258
+ category=PluginCategory.DATA,
259
+ parameters={"condition": "string"},
260
+ returns={"filtered_rows": "int", "original_rows": "int"},
261
+ ),
262
+ ]
263
+
264
+ # ==============================================================================
265
+ # REGEX/TEXT TOOLS
266
+ # ==============================================================================
267
+
268
+ REGEX_TOOLS = [
269
+ ToolDefinition(
270
+ name="regex.match",
271
+ description="Match pattern at start of string",
272
+ category=PluginCategory.EXTRACTION,
273
+ parameters={"pattern": "string", "text": "string", "flags": "string"},
274
+ returns={"matched": "bool", "groups": "list[string]"},
275
+ ),
276
+ ToolDefinition(
277
+ name="regex.search",
278
+ description="Search for pattern anywhere in string",
279
+ category=PluginCategory.EXTRACTION,
280
+ parameters={"pattern": "string", "text": "string"},
281
+ returns={"found": "bool", "position": "int", "match": "string"},
282
+ ),
283
+ ToolDefinition(
284
+ name="regex.findall",
285
+ description="Find all matches of pattern",
286
+ category=PluginCategory.EXTRACTION,
287
+ parameters={"pattern": "string", "text": "string"},
288
+ returns={"matches": "list[string]", "count": "int"},
289
+ ),
290
+ ToolDefinition(
291
+ name="regex.sub",
292
+ description="Replace pattern matches in string",
293
+ category=PluginCategory.EXTRACTION,
294
+ parameters={"pattern": "string", "replacement": "string", "text": "string"},
295
+ returns={"result": "string", "replacements": "int"},
296
+ ),
297
+ ToolDefinition(
298
+ name="regex.split",
299
+ description="Split string by pattern",
300
+ category=PluginCategory.EXTRACTION,
301
+ parameters={"pattern": "string", "text": "string", "maxsplit": "int"},
302
+ returns={"parts": "list[string]", "count": "int"},
303
+ ),
304
+ ]
305
+
306
+ # ==============================================================================
307
+ # NETWORK/API TOOLS
308
+ # ==============================================================================
309
+
310
+ NETWORK_TOOLS = [
311
+ ToolDefinition(
312
+ name="http.get",
313
+ description="Make HTTP GET request",
314
+ category=PluginCategory.NETWORK,
315
+ parameters={"url": "string", "headers": "dict", "timeout": "int"},
316
+ returns={"status_code": "int", "content_length": "int", "headers": "dict"},
317
+ ),
318
+ ToolDefinition(
319
+ name="http.post",
320
+ description="Make HTTP POST request",
321
+ category=PluginCategory.NETWORK,
322
+ parameters={"url": "string", "data": "dict", "json": "dict", "headers": "dict"},
323
+ returns={"status_code": "int", "response": "any"},
324
+ ),
325
+ ToolDefinition(
326
+ name="http.head",
327
+ description="Make HTTP HEAD request to get headers",
328
+ category=PluginCategory.NETWORK,
329
+ parameters={"url": "string", "timeout": "int"},
330
+ returns={"status_code": "int", "headers": "dict"},
331
+ ),
332
+ ToolDefinition(
333
+ name="url.parse",
334
+ description="Parse URL into components",
335
+ category=PluginCategory.NETWORK,
336
+ parameters={"url": "string"},
337
+ returns={"scheme": "string", "domain": "string", "path": "string", "params": "dict"},
338
+ ),
339
+ ToolDefinition(
340
+ name="url.join",
341
+ description="Join base URL with relative path",
342
+ category=PluginCategory.NETWORK,
343
+ parameters={"base": "string", "path": "string"},
344
+ returns={"url": "string"},
345
+ ),
346
+ ]
347
+
348
+ # ==============================================================================
349
+ # MEDIA TOOLS
350
+ # ==============================================================================
351
+
352
+ MEDIA_TOOLS = [
353
+ ToolDefinition(
354
+ name="image.download",
355
+ description="Download image from URL",
356
+ category=PluginCategory.MEDIA,
357
+ parameters={"url": "string", "timeout": "int"},
358
+ returns={"size_bytes": "int", "format": "string", "dimensions": "dict"},
359
+ ),
360
+ ToolDefinition(
361
+ name="image.analyze",
362
+ description="Analyze image properties",
363
+ category=PluginCategory.MEDIA,
364
+ parameters={"url": "string"},
365
+ returns={"width": "int", "height": "int", "format": "string", "has_transparency": "bool"},
366
+ ),
367
+ ToolDefinition(
368
+ name="pdf.extract_text",
369
+ description="Extract text content from PDF",
370
+ category=PluginCategory.MEDIA,
371
+ parameters={"url": "string", "pages": "list[int]"},
372
+ returns={"text": "string", "pages": "int", "words": "int"},
373
+ ),
374
+ ToolDefinition(
375
+ name="video.metadata",
376
+ description="Extract video metadata",
377
+ category=PluginCategory.MEDIA,
378
+ parameters={"url": "string"},
379
+ returns={"duration": "int", "resolution": "string", "format": "string"},
380
+ ),
381
+ ]
382
+
383
+ # ==============================================================================
384
+ # ANALYSIS TOOLS
385
+ # ==============================================================================
386
+
387
+ ANALYSIS_TOOLS = [
388
+ ToolDefinition(
389
+ name="stats.describe",
390
+ description="Calculate descriptive statistics",
391
+ category=PluginCategory.ANALYSIS,
392
+ parameters={"data": "list[number]"},
393
+ returns={"mean": "float", "median": "float", "std": "float", "min": "float", "max": "float"},
394
+ ),
395
+ ToolDefinition(
396
+ name="stats.correlation",
397
+ description="Calculate correlation between datasets",
398
+ category=PluginCategory.ANALYSIS,
399
+ parameters={"x": "list[number]", "y": "list[number]"},
400
+ returns={"correlation": "float", "p_value": "float"},
401
+ ),
402
+ ToolDefinition(
403
+ name="text.sentiment",
404
+ description="Analyze sentiment of text",
405
+ category=PluginCategory.ANALYSIS,
406
+ parameters={"text": "string"},
407
+ returns={"score": "float", "label": "string (positive|negative|neutral)"},
408
+ ),
409
+ ToolDefinition(
410
+ name="text.entities",
411
+ description="Extract named entities from text",
412
+ category=PluginCategory.ANALYSIS,
413
+ parameters={"text": "string", "types": "list[string]"},
414
+ returns={"entities": "list[dict]", "count": "int"},
415
+ ),
416
+ ToolDefinition(
417
+ name="text.keywords",
418
+ description="Extract keywords from text",
419
+ category=PluginCategory.ANALYSIS,
420
+ parameters={"text": "string", "top_k": "int"},
421
+ returns={"keywords": "list[string]", "scores": "list[float]"},
422
+ ),
423
+ ]
424
+
425
+ # ==============================================================================
426
+ # EXTRACTION TOOLS
427
+ # ==============================================================================
428
+
429
+ EXTRACTION_TOOLS = [
430
+ ToolDefinition(
431
+ name="extract.emails",
432
+ description="Extract email addresses from text",
433
+ category=PluginCategory.EXTRACTION,
434
+ parameters={"text": "string"},
435
+ returns={"emails": "list[string]", "count": "int"},
436
+ ),
437
+ ToolDefinition(
438
+ name="extract.phones",
439
+ description="Extract phone numbers from text",
440
+ category=PluginCategory.EXTRACTION,
441
+ parameters={"text": "string", "country_code": "string"},
442
+ returns={"phones": "list[string]", "count": "int"},
443
+ ),
444
+ ToolDefinition(
445
+ name="extract.urls",
446
+ description="Extract URLs from text",
447
+ category=PluginCategory.EXTRACTION,
448
+ parameters={"text": "string"},
449
+ returns={"urls": "list[string]", "count": "int"},
450
+ ),
451
+ ToolDefinition(
452
+ name="extract.dates",
453
+ description="Extract and parse dates from text",
454
+ category=PluginCategory.EXTRACTION,
455
+ parameters={"text": "string", "format": "string"},
456
+ returns={"dates": "list[string]", "count": "int"},
457
+ ),
458
+ ToolDefinition(
459
+ name="extract.prices",
460
+ description="Extract prices and currencies from text",
461
+ category=PluginCategory.EXTRACTION,
462
+ parameters={"text": "string"},
463
+ returns={"prices": "list[dict]", "count": "int"},
464
+ ),
465
+ ToolDefinition(
466
+ name="extract.addresses",
467
+ description="Extract physical addresses from text",
468
+ category=PluginCategory.EXTRACTION,
469
+ parameters={"text": "string"},
470
+ returns={"addresses": "list[dict]", "count": "int"},
471
+ ),
472
+ ToolDefinition(
473
+ name="extract.social_handles",
474
+ description="Extract social media handles",
475
+ category=PluginCategory.EXTRACTION,
476
+ parameters={"text": "string", "platforms": "list[string]"},
477
+ returns={"handles": "dict[string, list]", "count": "int"},
478
+ ),
479
+ ]
480
+
481
+ # ==============================================================================
482
+ # VALIDATION TOOLS
483
+ # ==============================================================================
484
+
485
+ VALIDATION_TOOLS = [
486
+ ToolDefinition(
487
+ name="validate.url",
488
+ description="Validate URL format and accessibility",
489
+ category=PluginCategory.VALIDATION,
490
+ parameters={"url": "string", "check_accessibility": "bool"},
491
+ returns={"valid": "bool", "accessible": "bool", "status_code": "int"},
492
+ ),
493
+ ToolDefinition(
494
+ name="validate.email",
495
+ description="Validate email format",
496
+ category=PluginCategory.VALIDATION,
497
+ parameters={"email": "string"},
498
+ returns={"valid": "bool", "normalized": "string"},
499
+ ),
500
+ ToolDefinition(
501
+ name="validate.json",
502
+ description="Validate JSON format",
503
+ category=PluginCategory.VALIDATION,
504
+ parameters={"text": "string"},
505
+ returns={"valid": "bool", "error": "string|null"},
506
+ ),
507
+ ToolDefinition(
508
+ name="validate.html",
509
+ description="Validate HTML structure",
510
+ category=PluginCategory.VALIDATION,
511
+ parameters={"html": "string"},
512
+ returns={"valid": "bool", "errors": "list[string]"},
513
+ ),
514
+ ToolDefinition(
515
+ name="validate.schema",
516
+ description="Validate data against JSON schema",
517
+ category=PluginCategory.VALIDATION,
518
+ parameters={"data": "any", "schema": "dict"},
519
+ returns={"valid": "bool", "errors": "list[string]"},
520
+ ),
521
+ ]
522
+
523
+ # ==============================================================================
524
+ # STORAGE TOOLS
525
+ # ==============================================================================
526
+
527
+ STORAGE_TOOLS = [
528
+ ToolDefinition(
529
+ name="memory.store",
530
+ description="Store data in long-term memory",
531
+ category=PluginCategory.STORAGE,
532
+ parameters={"key": "string", "value": "any", "ttl": "int"},
533
+ returns={"stored": "bool", "key": "string"},
534
+ ),
535
+ ToolDefinition(
536
+ name="memory.retrieve",
537
+ description="Retrieve data from memory",
538
+ category=PluginCategory.STORAGE,
539
+ parameters={"key": "string"},
540
+ returns={"found": "bool", "value": "any"},
541
+ ),
542
+ ToolDefinition(
543
+ name="memory.search",
544
+ description="Search memory by semantic similarity",
545
+ category=PluginCategory.STORAGE,
546
+ parameters={"query": "string", "limit": "int"},
547
+ returns={"results": "list[dict]", "count": "int"},
548
+ ),
549
+ ToolDefinition(
550
+ name="cache.get",
551
+ description="Get value from session cache",
552
+ category=PluginCategory.STORAGE,
553
+ parameters={"key": "string"},
554
+ returns={"found": "bool", "value": "any"},
555
+ ),
556
+ ToolDefinition(
557
+ name="cache.set",
558
+ description="Set value in session cache",
559
+ category=PluginCategory.STORAGE,
560
+ parameters={"key": "string", "value": "any"},
561
+ returns={"stored": "bool"},
562
+ ),
563
+ ]
564
+
565
+ # ==============================================================================
566
+ # SANDBOX TOOLS
567
+ # ==============================================================================
568
+
569
+ SANDBOX_TOOLS = [
570
+ ToolDefinition(
571
+ name="sandbox.execute",
572
+ description="Execute Python code in sandboxed environment",
573
+ category=PluginCategory.AI,
574
+ parameters={"code": "string", "payload": "dict", "timeout": "int"},
575
+ returns={"success": "bool", "output": "any", "stdout": "string"},
576
+ ),
577
+ ToolDefinition(
578
+ name="sandbox.analyze",
579
+ description="Run data analysis in sandbox",
580
+ category=PluginCategory.AI,
581
+ parameters={"data": "list[dict]", "analysis_type": "string"},
582
+ returns={"result": "dict", "visualizations": "list"},
583
+ ),
584
+ ToolDefinition(
585
+ name="sandbox.transform",
586
+ description="Transform data using sandbox code",
587
+ category=PluginCategory.AI,
588
+ parameters={"data": "any", "transform_code": "string"},
589
+ returns={"transformed": "any", "success": "bool"},
590
+ ),
591
+ ]
592
+
593
+ # ==============================================================================
594
+ # AI TOOLS
595
+ # ==============================================================================
596
+
597
+ AI_TOOLS = [
598
+ ToolDefinition(
599
+ name="ai.complete",
600
+ description="Generate text completion using AI model",
601
+ category=PluginCategory.AI,
602
+ parameters={"prompt": "string", "model": "string", "max_tokens": "int"},
603
+ returns={"text": "string", "tokens_used": "int"},
604
+ ),
605
+ ToolDefinition(
606
+ name="ai.embed",
607
+ description="Generate embeddings for text",
608
+ category=PluginCategory.AI,
609
+ parameters={"text": "string", "model": "string"},
610
+ returns={"embedding": "list[float]", "dimensions": "int"},
611
+ ),
612
+ ToolDefinition(
613
+ name="ai.classify",
614
+ description="Classify text into categories",
615
+ category=PluginCategory.AI,
616
+ parameters={"text": "string", "labels": "list[string]"},
617
+ returns={"label": "string", "confidence": "float"},
618
+ ),
619
+ ToolDefinition(
620
+ name="ai.summarize",
621
+ description="Summarize text content",
622
+ category=PluginCategory.AI,
623
+ parameters={"text": "string", "max_length": "int"},
624
+ returns={"summary": "string", "reduction_ratio": "float"},
625
+ ),
626
+ ]
627
+
628
+ # ==============================================================================
629
+ # PLUGIN DEFINITIONS
630
+ # ==============================================================================
631
+
632
+ PLUGINS: list[PluginDefinition] = [
633
+ PluginDefinition(
634
+ id="browser",
635
+ name="Browser Automation",
636
+ description="Control browser navigation, clicks, typing, and screenshots",
637
+ category=PluginCategory.BROWSER,
638
+ tools=BROWSER_TOOLS,
639
+ ),
640
+ PluginDefinition(
641
+ id="html-parser",
642
+ name="HTML/DOM Parser",
643
+ description="Parse and query HTML documents using BeautifulSoup",
644
+ category=PluginCategory.PARSER,
645
+ tools=HTML_TOOLS,
646
+ ),
647
+ PluginDefinition(
648
+ id="data-processing",
649
+ name="Data Processing",
650
+ description="JSON, CSV, and Pandas data processing tools",
651
+ category=PluginCategory.DATA,
652
+ tools=DATA_TOOLS,
653
+ ),
654
+ PluginDefinition(
655
+ id="regex",
656
+ name="Regular Expressions",
657
+ description="Pattern matching and text extraction using regex",
658
+ category=PluginCategory.EXTRACTION,
659
+ tools=REGEX_TOOLS,
660
+ ),
661
+ PluginDefinition(
662
+ id="network",
663
+ name="Network/HTTP",
664
+ description="HTTP requests and URL handling",
665
+ category=PluginCategory.NETWORK,
666
+ tools=NETWORK_TOOLS,
667
+ ),
668
+ PluginDefinition(
669
+ id="media",
670
+ name="Media Processing",
671
+ description="Image, PDF, and video processing tools",
672
+ category=PluginCategory.MEDIA,
673
+ tools=MEDIA_TOOLS,
674
+ ),
675
+ PluginDefinition(
676
+ id="analysis",
677
+ name="Analysis",
678
+ description="Statistical analysis and NLP tools",
679
+ category=PluginCategory.ANALYSIS,
680
+ tools=ANALYSIS_TOOLS,
681
+ ),
682
+ PluginDefinition(
683
+ id="extraction",
684
+ name="Data Extraction",
685
+ description="Extract structured data like emails, phones, addresses",
686
+ category=PluginCategory.EXTRACTION,
687
+ tools=EXTRACTION_TOOLS,
688
+ ),
689
+ PluginDefinition(
690
+ id="validation",
691
+ name="Validation",
692
+ description="Validate URLs, emails, JSON, HTML, and schemas",
693
+ category=PluginCategory.VALIDATION,
694
+ tools=VALIDATION_TOOLS,
695
+ ),
696
+ PluginDefinition(
697
+ id="storage",
698
+ name="Storage/Memory",
699
+ description="Long-term memory and session cache",
700
+ category=PluginCategory.STORAGE,
701
+ tools=STORAGE_TOOLS,
702
+ ),
703
+ PluginDefinition(
704
+ id="sandbox",
705
+ name="Python Sandbox",
706
+ description="Execute Python code in isolated sandbox",
707
+ category=PluginCategory.AI,
708
+ tools=SANDBOX_TOOLS,
709
+ ),
710
+ PluginDefinition(
711
+ id="ai",
712
+ name="AI/LLM",
713
+ description="AI completion, embeddings, and classification",
714
+ category=PluginCategory.AI,
715
+ tools=AI_TOOLS,
716
+ ),
717
+ ]
718
+
719
+
720
+ def get_all_plugins() -> list[PluginDefinition]:
721
+ """Get all registered plugins."""
722
+ return PLUGINS
723
+
724
+
725
+ def get_plugin(plugin_id: str) -> Optional[PluginDefinition]:
726
+ """Get plugin by ID."""
727
+ for plugin in PLUGINS:
728
+ if plugin.id == plugin_id:
729
+ return plugin
730
+ return None
731
+
732
+
733
+ def get_all_tools() -> list[ToolDefinition]:
734
+ """Get all registered tools across all plugins."""
735
+ tools = []
736
+ for plugin in PLUGINS:
737
+ tools.extend(plugin.tools)
738
+ return tools
739
+
740
+
741
+ def get_tool(tool_name: str) -> Optional[ToolDefinition]:
742
+ """Get tool definition by name."""
743
+ for plugin in PLUGINS:
744
+ for tool in plugin.tools:
745
+ if tool.name == tool_name:
746
+ return tool
747
+ return None
748
+
749
+
750
+ def get_tools_by_category(category: PluginCategory) -> list[ToolDefinition]:
751
+ """Get all tools in a category."""
752
+ tools = []
753
+ for plugin in PLUGINS:
754
+ if plugin.category == category:
755
+ tools.extend(plugin.tools)
756
+ return tools
757
+
758
+
759
+ def get_plugin_summary() -> dict[str, Any]:
760
+ """Get summary of all plugins and tools."""
761
+ return {
762
+ "plugins_count": len(PLUGINS),
763
+ "tools_count": sum(len(p.tools) for p in PLUGINS),
764
+ "categories": list(set(p.category.value for p in PLUGINS)),
765
+ "plugins": [
766
+ {
767
+ "id": p.id,
768
+ "name": p.name,
769
+ "category": p.category.value,
770
+ "tools_count": len(p.tools),
771
+ }
772
+ for p in PLUGINS
773
+ ],
774
+ }