NeerajCodz commited on
Commit
48f04de
·
1 Parent(s): 64ae2f9

feat: add comprehensive tool call visibility in step accordion

Browse files
Files changed (1) hide show
  1. backend/app/api/routes/scrape.py +292 -0
backend/app/api/routes/scrape.py CHANGED
@@ -2063,6 +2063,30 @@ async def scrape_stream(
2063
  "'site_strategy': (payload.get('site_template') or {}).get('default_strategy')"
2064
  "}"
2065
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2066
  try:
2067
  planner_sandbox = await asyncio.to_thread(
2068
  execute_python_sandbox,
@@ -2078,6 +2102,29 @@ async def scrape_stream(
2078
  error=f"Planner sandbox setup failed: {exc}",
2079
  )
2080
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2081
  if planner_sandbox.success and planner_sandbox.output is not None:
2082
  planner_python_event = _record_step(
2083
  session,
@@ -2163,6 +2210,31 @@ async def scrape_stream(
2163
  "'strategy': payload.get('navigation_strategy')"
2164
  "}"
2165
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2166
  try:
2167
  navigator_sandbox = await asyncio.to_thread(
2168
  execute_python_sandbox,
@@ -2178,6 +2250,29 @@ async def scrape_stream(
2178
  error=f"Navigator sandbox setup failed: {exc}",
2179
  )
2180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2181
  if navigator_sandbox.success and navigator_sandbox.output is not None:
2182
  navigator_python_event = _record_step(
2183
  session,
@@ -2292,6 +2387,77 @@ async def scrape_stream(
2292
  }
2293
 
2294
  sandbox_code = request.python_code or DEFAULT_ANALYSIS_CODE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2295
  try:
2296
  sandbox_result = await asyncio.to_thread(
2297
  execute_python_sandbox,
@@ -2307,6 +2473,29 @@ async def scrape_stream(
2307
  error=f"Sandbox setup failed: {exc}",
2308
  stderr="",
2309
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2310
 
2311
  if sandbox_result.success and sandbox_result.output is not None:
2312
  if isinstance(session["extracted_data"], dict):
@@ -2348,16 +2537,84 @@ async def scrape_stream(
2348
  yield _sse_event(sandbox_event)
2349
 
2350
  duration = time.time() - start_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2351
  output = await format_output(
2352
  session["extracted_data"],
2353
  request.output_format,
2354
  request.output_instructions,
2355
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2356
  output_ext = request.output_format.value
2357
  _write_session_artifact(session, f"final_output.{output_ext}", output)
2358
  _write_session_json_artifact(session, "final_extracted_data.json", session["extracted_data"])
2359
 
2360
  if request.enable_memory:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2361
  try:
2362
  await memory_manager.store(
2363
  key=f"scrape:{session_id}:summary",
@@ -2371,8 +2628,43 @@ async def scrape_stream(
2371
  },
2372
  )
2373
  _write_session_artifact(session, "memory_summary.txt", output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2374
  except Exception as exc:
2375
  session["errors"].append(f"Failed to store summary memory: {exc}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2376
 
2377
  response = ScrapeResponse(
2378
  session_id=session_id,
 
2063
  "'site_strategy': (payload.get('site_template') or {}).get('default_strategy')"
2064
  "}"
2065
  )
2066
+
2067
+ # Tool call: sandbox.execute (planner)
2068
+ sandbox_tool_event = _record_step(
2069
+ session,
2070
+ ScrapeStep(
2071
+ step_number=len(session["steps"]) + 1,
2072
+ action="tool_call",
2073
+ status="running",
2074
+ message="sandbox.execute(code='planner_analysis')",
2075
+ extracted_data={
2076
+ "tool_name": "sandbox.execute",
2077
+ "tool_description": "Execute Python code in isolated sandbox environment",
2078
+ "parameters": {
2079
+ "code_type": "planner_analysis",
2080
+ "imports": ["json"],
2081
+ "payload_keys": list(planner_payload.keys()),
2082
+ },
2083
+ },
2084
+ timestamp=_now_iso(),
2085
+ ),
2086
+ )
2087
+ await manager.broadcast(sandbox_tool_event, session_id)
2088
+ yield _sse_event(sandbox_tool_event)
2089
+
2090
  try:
2091
  planner_sandbox = await asyncio.to_thread(
2092
  execute_python_sandbox,
 
2102
  error=f"Planner sandbox setup failed: {exc}",
2103
  )
2104
 
2105
+ # Tool call result
2106
+ sandbox_result_event = _record_step(
2107
+ session,
2108
+ ScrapeStep(
2109
+ step_number=len(session["steps"]),
2110
+ action="tool_call",
2111
+ status="completed" if planner_sandbox.success else "failed",
2112
+ message=f"sandbox.execute() → {'success' if planner_sandbox.success else 'failed'}",
2113
+ reward=0.05 if planner_sandbox.success else 0.0,
2114
+ extracted_data={
2115
+ "tool_name": "sandbox.execute",
2116
+ "result": {
2117
+ "success": planner_sandbox.success,
2118
+ "output_keys": list(planner_sandbox.output.keys()) if planner_sandbox.output else [],
2119
+ "error": planner_sandbox.error,
2120
+ },
2121
+ },
2122
+ timestamp=_now_iso(),
2123
+ ),
2124
+ )
2125
+ await manager.broadcast(sandbox_result_event, session_id)
2126
+ yield _sse_event(sandbox_result_event)
2127
+
2128
  if planner_sandbox.success and planner_sandbox.output is not None:
2129
  planner_python_event = _record_step(
2130
  session,
 
2210
  "'strategy': payload.get('navigation_strategy')"
2211
  "}"
2212
  )
2213
+
2214
+ # Tool call: sandbox.execute (navigator)
2215
+ nav_sandbox_tool_event = _record_step(
2216
+ session,
2217
+ ScrapeStep(
2218
+ step_number=len(session["steps"]) + 1,
2219
+ action="tool_call",
2220
+ url=url,
2221
+ status="running",
2222
+ message="sandbox.execute(code='navigator_analysis')",
2223
+ extracted_data={
2224
+ "tool_name": "sandbox.execute",
2225
+ "tool_description": "Execute navigator analysis in sandbox",
2226
+ "parameters": {
2227
+ "code_type": "navigator_analysis",
2228
+ "imports": ["json"],
2229
+ "url": url,
2230
+ },
2231
+ },
2232
+ timestamp=_now_iso(),
2233
+ ),
2234
+ )
2235
+ await manager.broadcast(nav_sandbox_tool_event, session_id)
2236
+ yield _sse_event(nav_sandbox_tool_event)
2237
+
2238
  try:
2239
  navigator_sandbox = await asyncio.to_thread(
2240
  execute_python_sandbox,
 
2250
  error=f"Navigator sandbox setup failed: {exc}",
2251
  )
2252
 
2253
+ # Tool call result
2254
+ nav_sandbox_result_event = _record_step(
2255
+ session,
2256
+ ScrapeStep(
2257
+ step_number=len(session["steps"]),
2258
+ action="tool_call",
2259
+ url=url,
2260
+ status="completed" if navigator_sandbox.success else "failed",
2261
+ message=f"sandbox.execute() → {'success' if navigator_sandbox.success else 'failed'}",
2262
+ reward=0.05 if navigator_sandbox.success else 0.0,
2263
+ extracted_data={
2264
+ "tool_name": "sandbox.execute",
2265
+ "result": {
2266
+ "success": navigator_sandbox.success,
2267
+ "output_keys": list(navigator_sandbox.output.keys()) if navigator_sandbox.output else [],
2268
+ },
2269
+ },
2270
+ timestamp=_now_iso(),
2271
+ ),
2272
+ )
2273
+ await manager.broadcast(nav_sandbox_result_event, session_id)
2274
+ yield _sse_event(nav_sandbox_result_event)
2275
+
2276
  if navigator_sandbox.success and navigator_sandbox.output is not None:
2277
  navigator_python_event = _record_step(
2278
  session,
 
2387
  }
2388
 
2389
  sandbox_code = request.python_code or DEFAULT_ANALYSIS_CODE
2390
+
2391
+ # Tool call: pandas.DataFrame (data analysis)
2392
+ pandas_tool_event = _record_step(
2393
+ session,
2394
+ ScrapeStep(
2395
+ step_number=len(session["steps"]) + 1,
2396
+ action="tool_call",
2397
+ status="running",
2398
+ message="pandas.DataFrame(rows)",
2399
+ extracted_data={
2400
+ "tool_name": "pandas.DataFrame",
2401
+ "tool_description": "Create DataFrame from extracted dataset rows",
2402
+ "parameters": {
2403
+ "row_count": len(dataset_rows),
2404
+ "source_count": len(source_links),
2405
+ },
2406
+ },
2407
+ timestamp=_now_iso(),
2408
+ ),
2409
+ )
2410
+ await manager.broadcast(pandas_tool_event, session_id)
2411
+ yield _sse_event(pandas_tool_event)
2412
+
2413
+ # Tool call: bs4.BeautifulSoup (HTML analysis)
2414
+ if html_samples:
2415
+ bs4_tool_event = _record_step(
2416
+ session,
2417
+ ScrapeStep(
2418
+ step_number=len(session["steps"]) + 1,
2419
+ action="tool_call",
2420
+ status="running",
2421
+ message=f"bs4.BeautifulSoup(html, 'html.parser') × {len(html_samples)}",
2422
+ extracted_data={
2423
+ "tool_name": "bs4.BeautifulSoup",
2424
+ "tool_description": "Parse HTML samples for link analysis",
2425
+ "parameters": {
2426
+ "parser": "html.parser",
2427
+ "sample_count": len(html_samples),
2428
+ "total_bytes": sum(len(h) for h in html_samples.values()),
2429
+ },
2430
+ },
2431
+ timestamp=_now_iso(),
2432
+ ),
2433
+ )
2434
+ await manager.broadcast(bs4_tool_event, session_id)
2435
+ yield _sse_event(bs4_tool_event)
2436
+
2437
+ # Tool call: sandbox.execute (analysis)
2438
+ analysis_sandbox_event = _record_step(
2439
+ session,
2440
+ ScrapeStep(
2441
+ step_number=len(session["steps"]) + 1,
2442
+ action="tool_call",
2443
+ status="running",
2444
+ message="sandbox.execute(code='data_analysis')",
2445
+ extracted_data={
2446
+ "tool_name": "sandbox.execute",
2447
+ "tool_description": "Run comprehensive data analysis in sandbox",
2448
+ "parameters": {
2449
+ "imports": ["pandas", "numpy", "bs4", "json"],
2450
+ "dataset_rows": len(dataset_rows),
2451
+ "html_samples": len(html_samples),
2452
+ "custom_code": bool(request.python_code),
2453
+ },
2454
+ },
2455
+ timestamp=_now_iso(),
2456
+ ),
2457
+ )
2458
+ await manager.broadcast(analysis_sandbox_event, session_id)
2459
+ yield _sse_event(analysis_sandbox_event)
2460
+
2461
  try:
2462
  sandbox_result = await asyncio.to_thread(
2463
  execute_python_sandbox,
 
2473
  error=f"Sandbox setup failed: {exc}",
2474
  stderr="",
2475
  )
2476
+
2477
+ # Tool call result: sandbox.execute
2478
+ sandbox_exec_result_event = _record_step(
2479
+ session,
2480
+ ScrapeStep(
2481
+ step_number=len(session["steps"]),
2482
+ action="tool_call",
2483
+ status="completed" if sandbox_result.success else "failed",
2484
+ message=f"sandbox.execute() → {'analysis complete' if sandbox_result.success else 'failed'}",
2485
+ reward=0.1 if sandbox_result.success else 0.0,
2486
+ extracted_data={
2487
+ "tool_name": "sandbox.execute",
2488
+ "result": {
2489
+ "success": sandbox_result.success,
2490
+ "output_keys": list(sandbox_result.output.keys()) if sandbox_result.output else [],
2491
+ "error": sandbox_result.error if not sandbox_result.success else None,
2492
+ },
2493
+ },
2494
+ timestamp=_now_iso(),
2495
+ ),
2496
+ )
2497
+ await manager.broadcast(sandbox_exec_result_event, session_id)
2498
+ yield _sse_event(sandbox_exec_result_event)
2499
 
2500
  if sandbox_result.success and sandbox_result.output is not None:
2501
  if isinstance(session["extracted_data"], dict):
 
2537
  yield _sse_event(sandbox_event)
2538
 
2539
  duration = time.time() - start_time
2540
+
2541
+ # Tool call: json.dumps (output formatting)
2542
+ json_format_event = _record_step(
2543
+ session,
2544
+ ScrapeStep(
2545
+ step_number=len(session["steps"]) + 1,
2546
+ action="tool_call",
2547
+ status="running",
2548
+ message=f"json.dumps(data, format='{request.output_format.value}')",
2549
+ extracted_data={
2550
+ "tool_name": "json.dumps",
2551
+ "tool_description": f"Format extracted data as {request.output_format.value.upper()}",
2552
+ "parameters": {
2553
+ "output_format": request.output_format.value,
2554
+ "data_keys": list(session["extracted_data"].keys()) if isinstance(session["extracted_data"], dict) else ["data"],
2555
+ },
2556
+ },
2557
+ timestamp=_now_iso(),
2558
+ ),
2559
+ )
2560
+ await manager.broadcast(json_format_event, session_id)
2561
+ yield _sse_event(json_format_event)
2562
+
2563
  output = await format_output(
2564
  session["extracted_data"],
2565
  request.output_format,
2566
  request.output_instructions,
2567
  )
2568
+
2569
+ json_format_result_event = _record_step(
2570
+ session,
2571
+ ScrapeStep(
2572
+ step_number=len(session["steps"]),
2573
+ action="tool_call",
2574
+ status="completed",
2575
+ message=f"json.dumps() → {len(output)} bytes",
2576
+ reward=0.05,
2577
+ extracted_data={
2578
+ "tool_name": "json.dumps",
2579
+ "result": {
2580
+ "output_length": len(output),
2581
+ "format": request.output_format.value,
2582
+ },
2583
+ },
2584
+ timestamp=_now_iso(),
2585
+ ),
2586
+ )
2587
+ await manager.broadcast(json_format_result_event, session_id)
2588
+ yield _sse_event(json_format_result_event)
2589
+
2590
  output_ext = request.output_format.value
2591
  _write_session_artifact(session, f"final_output.{output_ext}", output)
2592
  _write_session_json_artifact(session, "final_extracted_data.json", session["extracted_data"])
2593
 
2594
  if request.enable_memory:
2595
+ # Tool call: memory.store
2596
+ memory_store_event = _record_step(
2597
+ session,
2598
+ ScrapeStep(
2599
+ step_number=len(session["steps"]) + 1,
2600
+ action="tool_call",
2601
+ status="running",
2602
+ message="memory.store(key='summary', type='LONG_TERM')",
2603
+ extracted_data={
2604
+ "tool_name": "memory.store",
2605
+ "tool_description": "Store scrape summary in long-term memory",
2606
+ "parameters": {
2607
+ "key": f"scrape:{session_id}:summary",
2608
+ "memory_type": "LONG_TERM",
2609
+ "output_length": len(output),
2610
+ },
2611
+ },
2612
+ timestamp=_now_iso(),
2613
+ ),
2614
+ )
2615
+ await manager.broadcast(memory_store_event, session_id)
2616
+ yield _sse_event(memory_store_event)
2617
+
2618
  try:
2619
  await memory_manager.store(
2620
  key=f"scrape:{session_id}:summary",
 
2628
  },
2629
  )
2630
  _write_session_artifact(session, "memory_summary.txt", output)
2631
+
2632
+ # Tool call result: memory.store
2633
+ memory_store_result_event = _record_step(
2634
+ session,
2635
+ ScrapeStep(
2636
+ step_number=len(session["steps"]),
2637
+ action="tool_call",
2638
+ status="completed",
2639
+ message="memory.store() → stored",
2640
+ reward=0.05,
2641
+ extracted_data={
2642
+ "tool_name": "memory.store",
2643
+ "result": {"stored": True, "key": f"scrape:{session_id}:summary"},
2644
+ },
2645
+ timestamp=_now_iso(),
2646
+ ),
2647
+ )
2648
+ await manager.broadcast(memory_store_result_event, session_id)
2649
+ yield _sse_event(memory_store_result_event)
2650
  except Exception as exc:
2651
  session["errors"].append(f"Failed to store summary memory: {exc}")
2652
+ memory_store_fail_event = _record_step(
2653
+ session,
2654
+ ScrapeStep(
2655
+ step_number=len(session["steps"]),
2656
+ action="tool_call",
2657
+ status="failed",
2658
+ message=f"memory.store() → {str(exc)[:50]}",
2659
+ extracted_data={
2660
+ "tool_name": "memory.store",
2661
+ "result": {"stored": False, "error": str(exc)[:100]},
2662
+ },
2663
+ timestamp=_now_iso(),
2664
+ ),
2665
+ )
2666
+ await manager.broadcast(memory_store_fail_event, session_id)
2667
+ yield _sse_event(memory_store_fail_event)
2668
 
2669
  response = ScrapeResponse(
2670
  session_id=session_id,