Spaces:

smolagents
/

computer-use-agent

Running

A-Mahla commited on Nov 3

Commit

e0d4a07

1 Parent(s): 97e46c6

ADD Backend V1 (#2)

* ADD backend logic

* FIX pre-commit

* FIX pre-commit

* FIX pre-commit

* FIX Agent loop

* ADD pytest

* ADD pytest

* CHG github workflow

Files changed (20) hide show

.github/workflows/pre-commit.yml +1 -1
Makefile +8 -0
cua2-core/pyproject.toml +3 -3
cua2-core/pytest.ini +13 -0
cua2-core/src/cua2_core/app.py +6 -5
cua2-core/src/cua2_core/models/models.py +113 -47
cua2-core/src/cua2_core/routes/routes.py +29 -20
cua2-core/src/cua2_core/routes/websocket.py +10 -10
cua2-core/src/cua2_core/services/agent_service.py +315 -106
cua2-core/src/cua2_core/services/agent_utils/desktop_agent.py +231 -0
cua2-core/src/cua2_core/services/agent_utils/function_parser.py +560 -0
cua2-core/src/cua2_core/services/agent_utils/get_model.py +18 -0
cua2-core/src/cua2_core/services/agent_utils/prompt.py +136 -0
cua2-core/src/cua2_core/services/sandbox_service.py +90 -0
cua2-core/src/cua2_core/websocket/websocket_manager.py +62 -62
cua2-core/tests/__init__.py +1 -0
cua2-core/tests/test_routes.py +311 -0
cua2-front/src/components/mock/TaskButton.tsx +4 -4
cua2-front/src/pages/Index.tsx +9 -11
cua2-front/src/types/agent.ts +15 -0

.github/workflows/pre-commit.yml CHANGED Viewed

@@ -31,4 +31,4 @@ jobs:
       - name: Run pre-commit
         run: |
-          uv run pre-commit run --all-files --show-diff-on-failure

       - name: Run pre-commit
         run: |
+          make pre-commit

Makefile CHANGED Viewed

@@ -23,6 +23,14 @@ dev-frontend:
 pre-commit:
 	uv run pre-commit run --all-files --show-diff-on-failure
 clean:
 	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true

 pre-commit:
 	uv run pre-commit run --all-files --show-diff-on-failure
+	make test
+# Run tests
+test:
+	cd cua2-core && uv run pytest tests/ -v
+test-coverage:
+	cd cua2-core && uv run pytest tests/ -v --cov=cua2_core --cov-report=html --cov-report=term
 clean:
 	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true

cua2-core/pyproject.toml CHANGED Viewed

@@ -33,9 +33,9 @@ dependencies = [
     "httpx>=0.27.1",
     "asyncio-mqtt==0.16.1",
     "aiofiles==23.2.1",
-    "smolagents[openai,litellm]==1.15.0",
-    "openai==1.91.0",
-    "litellm[proxy]==1.63.14",
 ]
 [project.optional-dependencies]

     "httpx>=0.27.1",
     "asyncio-mqtt==0.16.1",
     "aiofiles==23.2.1",
+    "smolagents[openai,litellm]==1.22.0",
+    "openai==2.6.1",
+    "e2b-desktop==2.1.0",
 ]
 [project.optional-dependencies]

cua2-core/pytest.ini ADDED Viewed

	@@ -0,0 +1,13 @@

+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts =
+    -v
+    --strict-markers
+    --tb=short
+    --disable-warnings
+markers =
+    unit: Unit tests
+    integration: Integration tests

cua2-core/src/cua2_core/app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from contextlib import asynccontextmanager
 from cua2_core.services.agent_service import AgentService
 from cua2_core.websocket.websocket_manager import WebSocketManager
 from dotenv import load_dotenv
 from fastapi import FastAPI
@@ -16,23 +17,23 @@ async def lifespan(app: FastAPI):
     # Startup: Initialize services
     print("Initializing services...")
-    # Initialize WebSocket manager
     websocket_manager = WebSocketManager()
-    # Initialize agent service with websocket manager dependency
-    agent_service = AgentService(websocket_manager)
     # Store services in app state for access in routes
     app.state.websocket_manager = websocket_manager
     app.state.agent_service = agent_service
     print("Services initialized successfully")
     yield
-    # Shutdown: Clean up resources
     print("Shutting down services...")
-    # Add any cleanup logic here if needed
     print("Services shut down successfully")

 from contextlib import asynccontextmanager
 from cua2_core.services.agent_service import AgentService
+from cua2_core.services.sandbox_service import SandboxService
 from cua2_core.websocket.websocket_manager import WebSocketManager
 from dotenv import load_dotenv
 from fastapi import FastAPI
     # Startup: Initialize services
     print("Initializing services...")
     websocket_manager = WebSocketManager()
+    sandbox_service = SandboxService()
+    agent_service = AgentService(websocket_manager, sandbox_service)
     # Store services in app state for access in routes
     app.state.websocket_manager = websocket_manager
+    app.state.sandbox_service = sandbox_service
     app.state.agent_service = agent_service
     print("Services initialized successfully")
     yield
     print("Shutting down services...")
+    await sandbox_service.cleanup_sandboxes()
     print("Services shut down successfully")

cua2-core/src/cua2_core/models/models.py CHANGED Viewed

@@ -1,70 +1,91 @@
 import json
 import os
 from datetime import datetime
 from typing import Annotated, Literal, Optional
-from pydantic import BaseModel, Field, field_serializer, model_validator
 from typing_extensions import TypeAlias
 #################### Backend -> Frontend ########################
-class AgentAction(BaseModel):
     """Agent action structure"""
-    actionType: Literal[
-        "click",
-        "write",
-        "press",
-        "scroll",
-        "wait",
-        "open",
-        "launch_app",
-        "refresh",
-        "go_back",
-    ]
-    actionArguments: dict
     def to_string(self) -> str:
         """Convert action to a human-readable string"""
-        action_type = self.actionType
-        args = self.actionArguments
         if action_type == "click":
-            x = args.get("x", "?")
-            y = args.get("y", "?")
             return f"Click at coordinates ({x}, {y})"
         elif action_type == "write":
-            text = args.get("text", "")
             return f"Type text: '{text}'"
         elif action_type == "press":
-            key = args.get("key", "")
             return f"Press key: {key}"
         elif action_type == "scroll":
-            direction = args.get("direction", "down")
-            amount = args.get("amount", 2)
             return f"Scroll {direction} by {amount}"
         elif action_type == "wait":
-            seconds = args.get("seconds", 0)
             return f"Wait for {seconds} seconds"
         elif action_type == "open":
-            file_or_url = args.get("file_or_url", "")
-            return f"Open: {file_or_url}"
-        elif action_type == "launch_app":
-            app_name = args.get("app_name", "")
-            return f"Launch app: {app_name}"
-        elif action_type == "refresh":
-            return "Refresh the current page"
-        elif action_type == "go_back":
-            return "Go back one page"
 class AgentStep(BaseModel):
@@ -85,10 +106,10 @@ class AgentStep(BaseModel):
     def serialize_actions(self, actions: list[AgentAction], _info):
         """Convert actions to list of strings when dumping (controlled by context)"""
-        if _info.context and _info.context.get("actions_as_json", False):
             return [action.model_dump(mode="json") for action in actions]
-        return [action.to_string() for action in actions]
 class AgentTraceMetadata(BaseModel):
@@ -100,6 +121,7 @@ class AgentTraceMetadata(BaseModel):
     duration: float = 0.0  # in seconds
     numberOfSteps: int = 0
     maxSteps: int = 0
 class AgentTrace(BaseModel):
@@ -204,29 +226,54 @@ class ActiveTask(BaseModel):
     message_id: str
     instruction: str
-    modelId: str
     timestamp: datetime = datetime.now()
     steps: list[AgentStep] = []
     traceMetadata: AgentTraceMetadata = AgentTraceMetadata()
     @property
     def trace_path(self):
         """Trace path"""
-        return f"data/trace-{self.message_id}-{self.modelId}"
     @model_validator(mode="after")
     def store_model(self):
         """Validate model ID"""
-        self.traceMetadata.traceId = self.message_id
-        os.makedirs(self.trace_path, exist_ok=True)
-        with open(f"{self.trace_path}/tasks.json", "w") as f:
-            json.dump(
-                self.model_dump(mode="json", context={"actions_as_json": True}),
-                f,
-                indent=2,
-            )
-        return self
 class HealthResponse(BaseModel):
@@ -249,3 +296,22 @@ class ActiveTasksResponse(BaseModel):
     active_tasks: dict[str, ActiveTask]
     total_connections: int

 import json
 import os
+import threading
 from datetime import datetime
 from typing import Annotated, Literal, Optional
+from cua2_core.services.agent_utils.function_parser import FunctionCall
+from pydantic import BaseModel, Field, PrivateAttr, field_serializer, model_validator
 from typing_extensions import TypeAlias
 #################### Backend -> Frontend ########################
+class AgentAction(FunctionCall):
     """Agent action structure"""
+    @classmethod
+    def from_function_calls(
+        cls, function_calls: list[FunctionCall]
+    ) -> list["AgentAction"]:
+        list_of_actions = [cls(**action.model_dump()) for action in function_calls]
+        for action in list_of_actions:
+            action.description = action.to_string()
+        return list_of_actions
     def to_string(self) -> str:
         """Convert action to a human-readable string"""
+        action_type = self.function_name
+        args = self.parameters
         if action_type == "click":
+            x = args.get("x") or args.get("arg_0")
+            y = args.get("y") or args.get("arg_1")
             return f"Click at coordinates ({x}, {y})"
+        if action_type == "right_click":
+            x = args.get("x") or args.get("arg_0")
+            y = args.get("y") or args.get("arg_1")
+            return f"Right click at coordinates ({x}, {y})"
+        if action_type == "double_click":
+            x = args.get("x") or args.get("arg_0")
+            y = args.get("y") or args.get("arg_1")
+            return f"Right click at coordinates ({x}, {y})"
+        if action_type == "move_mouse":
+            x = args.get("x") or args.get("arg_0")
+            y = args.get("y") or args.get("arg_1")
+            return f"Move mouse to coordinates ({x}, {y})"
         elif action_type == "write":
+            text = args.get("text") or args.get("arg_0")
             return f"Type text: '{text}'"
         elif action_type == "press":
+            key = args.get("key") or args.get("arg_0")
             return f"Press key: {key}"
+        elif action_type == "go_back":
+            return "Go back one page"
+        elif action_type == "drag":
+            x1 = args.get("x1") or args.get("arg_0")
+            y1 = args.get("y1") or args.get("arg_1")
+            x2 = args.get("x2") or args.get("arg_2")
+            y2 = args.get("y2") or args.get("arg_3")
+            return f"Drag from ({x1}, {y1}) to ({x2}, {y2})"
         elif action_type == "scroll":
+            x = args.get("x") or args.get("arg_0")
+            y = args.get("y") or args.get("arg_1")
+            direction = args.get("direction") or args.get("arg_2")
+            amount = args.get("amount") or args.get("arg_3") or 2
             return f"Scroll {direction} by {amount}"
         elif action_type == "wait":
+            seconds = args.get("seconds") or args.get("arg_0")
             return f"Wait for {seconds} seconds"
         elif action_type == "open":
+            url = args.get("url") or args.get("arg_0")
+            return f"Open: {url}"
+        elif action_type == "final_answer":
+            answer = args.get("answer") or args.get("arg_0")
+            return f"Final answer: {answer}"
+        return "Unknown action"
 class AgentStep(BaseModel):
     def serialize_actions(self, actions: list[AgentAction], _info):
         """Convert actions to list of strings when dumping (controlled by context)"""
+        if _info.context and _info.context.get("actions_as_json", True):
             return [action.model_dump(mode="json") for action in actions]
+        return [action.description for action in actions]
 class AgentTraceMetadata(BaseModel):
     duration: float = 0.0  # in seconds
     numberOfSteps: int = 0
     maxSteps: int = 0
+    completed: bool = False
 class AgentTrace(BaseModel):
     message_id: str
     instruction: str
+    model_id: str
     timestamp: datetime = datetime.now()
     steps: list[AgentStep] = []
     traceMetadata: AgentTraceMetadata = AgentTraceMetadata()
+    _file_lock: threading.Lock = PrivateAttr(default_factory=threading.Lock)
     @property
     def trace_path(self):
         """Trace path"""
+        return f"data/trace-{self.message_id}-{self.model_id.replace('/', '-')}"
     @model_validator(mode="after")
     def store_model(self):
         """Validate model ID"""
+        with self._file_lock:
+            os.makedirs(self.trace_path, exist_ok=True)
+            with open(f"{self.trace_path}/tasks.json", "w") as f:
+                json.dump(
+                    self.model_dump(
+                        mode="json",
+                        exclude={"_file_locks"},
+                        context={"actions_as_json": True},
+                    ),
+                    f,
+                    indent=2,
+                )
+    def update_step(self, step: AgentStep):
+        """Update step"""
+        with self._file_lock:
+            if int(step.stepId) <= len(self.steps):
+                self.steps[int(step.stepId) - 1] = step
+            else:
+                self.steps.append(step)
+                self.traceMetadata.numberOfSteps = len(self.steps)
+            with open(f"{self.trace_path}/tasks.json", "w") as f:
+                json.dump(
+                    self.model_dump(
+                        mode="json",
+                        exclude={"_file_locks"},
+                        context={"actions_as_json": True},
+                    ),
+                    f,
+                    indent=2,
+                )
+#################### API Routes Models ########################
 class HealthResponse(BaseModel):
     active_tasks: dict[str, ActiveTask]
     total_connections: int
+class UpdateStepRequest(BaseModel):
+    """Request model for updating a step"""
+    step_evaluation: Literal["like", "dislike", "neutral"]
+class UpdateStepResponse(BaseModel):
+    """Response model for step update"""
+    success: bool
+    message: str
+class AvailableModelsResponse(BaseModel):
+    """Response for available models"""
+    models: list[str]

cua2-core/src/cua2_core/routes/routes.py CHANGED Viewed

@@ -2,11 +2,13 @@ from datetime import datetime
 # Get services from app state
 from cua2_core.models.models import (
-    ActiveTasksResponse,
     HealthResponse,
-    TaskStatusResponse,
 )
 from cua2_core.services.agent_service import AgentService
 from cua2_core.websocket.websocket_manager import WebSocketManager
 from fastapi import APIRouter, Depends, HTTPException, Request
@@ -36,24 +38,31 @@ async def health_check(
     )
-@router.get("/tasks", response_model=ActiveTasksResponse)
-async def get_active_tasks(
-    agent_service: AgentService = Depends(get_agent_service),
-    websocket_manager: WebSocketManager = Depends(get_websocket_manager),
-):
-    """Get currently active tasks"""
-    return ActiveTasksResponse(
-        active_tasks=agent_service.get_active_tasks(),
-        total_connections=websocket_manager.get_connection_count(),
-    )
-@router.get("/tasks/{task_id}", response_model=TaskStatusResponse)
-async def get_task_status(
-    task_id: str, agent_service: AgentService = Depends(get_agent_service)
 ):
-    """Get status of a specific task"""
-    task_status = agent_service.get_task_status(task_id)
-    if task_status is None:
-        raise HTTPException(status_code=404, detail="Task not found")
-    return TaskStatusResponse(task_id=task_id, status=task_status)

 # Get services from app state
 from cua2_core.models.models import (
+    AvailableModelsResponse,
     HealthResponse,
+    UpdateStepRequest,
+    UpdateStepResponse,
 )
 from cua2_core.services.agent_service import AgentService
+from cua2_core.services.agent_utils.get_model import AVAILABLE_MODELS
 from cua2_core.websocket.websocket_manager import WebSocketManager
 from fastapi import APIRouter, Depends, HTTPException, Request
     )
+@router.get("/models", response_model=AvailableModelsResponse)
+async def get_available_models():
+    """Get list of all available model IDs"""
+    return AvailableModelsResponse(models=AVAILABLE_MODELS)
+@router.patch("/traces/{trace_id}/steps/{step_id}", response_model=UpdateStepResponse)
+async def update_trace_step(
+    trace_id: str,
+    step_id: str,
+    request: UpdateStepRequest,
+    agent_service: AgentService = Depends(get_agent_service),
 ):
+    """Update a specific step in a trace (e.g., update step evaluation)"""
+    try:
+        agent_service.update_trace_step(
+            trace_id=trace_id,
+            step_id=step_id,
+            step_evaluation=request.step_evaluation,
+        )
+        return UpdateStepResponse(
+            success=True,
+            message="Step updated successfully",
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except FileNotFoundError as e:
+        raise HTTPException(status_code=404, detail=str(e))

cua2-core/src/cua2_core/routes/websocket.py CHANGED Viewed

@@ -3,6 +3,8 @@ import json
 # Get services from app state
 from cua2_core.app import app
 from cua2_core.models.models import AgentTrace, HeartbeatEvent
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect
 # Create router
@@ -13,15 +15,15 @@ router = APIRouter()
 async def websocket_endpoint(websocket: WebSocket):
     """WebSocket endpoint for real-time communication"""
-    websocket_manager = app.state.websocket_manager
-    agent_service = app.state.agent_service
     await websocket_manager.connect(websocket)
     try:
         # Send welcome heartbeat
         welcome_message = HeartbeatEvent(type="heartbeat")
-        await websocket_manager.send_personal_message(welcome_message, websocket)
         # Keep the connection alive and wait for messages
         while True:
@@ -50,7 +52,9 @@ async def websocket_endpoint(websocket: WebSocket):
                             trace = AgentTrace(**trace_data)
                             # Process the user task with the trace
-                            trace_id = await agent_service.process_user_task(trace)
                             print(f"Started processing trace: {trace_id}")
                         else:
                             print("No trace data in message")
@@ -62,9 +66,7 @@ async def websocket_endpoint(websocket: WebSocket):
                     error_response = AgentErrorEvent(
                         type="agent_error", error="Invalid JSON format"
                     )
-                    await websocket_manager.send_personal_message(
-                        error_response, websocket
-                    )
                 except Exception as e:
                     print(f"Error processing message: {e}")
@@ -76,9 +78,7 @@ async def websocket_endpoint(websocket: WebSocket):
                     error_response = AgentErrorEvent(
                         type="agent_error", error=f"Error processing message: {str(e)}"
                     )
-                    await websocket_manager.send_personal_message(
-                        error_response, websocket
-                    )
             except Exception as e:
                 print(f"Error receiving WebSocket message: {e}")

 # Get services from app state
 from cua2_core.app import app
 from cua2_core.models.models import AgentTrace, HeartbeatEvent
+from cua2_core.services.agent_service import AgentService
+from cua2_core.websocket.websocket_manager import WebSocketManager
 from fastapi import APIRouter, WebSocket, WebSocketDisconnect
 # Create router
 async def websocket_endpoint(websocket: WebSocket):
     """WebSocket endpoint for real-time communication"""
+    websocket_manager: WebSocketManager = app.state.websocket_manager
+    agent_service: AgentService = app.state.agent_service
     await websocket_manager.connect(websocket)
     try:
         # Send welcome heartbeat
         welcome_message = HeartbeatEvent(type="heartbeat")
+        await websocket_manager.send_message(welcome_message, websocket)
         # Keep the connection alive and wait for messages
         while True:
                             trace = AgentTrace(**trace_data)
                             # Process the user task with the trace
+                            trace_id = await agent_service.process_user_task(
+                                trace, websocket
+                            )
                             print(f"Started processing trace: {trace_id}")
                         else:
                             print("No trace data in message")
                     error_response = AgentErrorEvent(
                         type="agent_error", error="Invalid JSON format"
                     )
+                    await websocket_manager.send_message(error_response, websocket)
                 except Exception as e:
                     print(f"Error processing message: {e}")
                     error_response = AgentErrorEvent(
                         type="agent_error", error=f"Error processing message: {str(e)}"
                     )
+                    await websocket_manager.send_message(error_response, websocket)
             except Exception as e:
                 print(f"Error receiving WebSocket message: {e}")

cua2-core/src/cua2_core/services/agent_service.py CHANGED Viewed

@@ -1,39 +1,45 @@
 import asyncio
 import base64
 import json
-from pathlib import Path
-from typing import Optional
 from cua2_core.models.models import (
     ActiveTask,
     AgentAction,
-    AgentCompleteEvent,
-    AgentErrorEvent,
-    AgentProgressEvent,
-    AgentStartEvent,
     AgentStep,
     AgentTrace,
     AgentTraceMetadata,
-    VncUrlSetEvent,
-    VncUrlUnsetEvent,
 )
-from cua2_core.websocket.websocket_manager import WebSocketManager
 class AgentService:
     """Service for handling agent tasks and processing"""
-    def __init__(self, websocket_manager):
         self.active_tasks: dict[str, ActiveTask] = {}
         self.websocket_manager: WebSocketManager = websocket_manager
-        self.simulation_data_path = (
-            Path(__file__).parent / "simulation_metadata" / "simulated_trace.json"
-        )
-        self.simulation_images_path = (
-            Path(__file__).parent / "simulation_metadata" / "images"
-        )
-    async def process_user_task(self, trace: AgentTrace) -> str:
         """Process a user task and return the trace ID"""
         trace_id = trace.id
@@ -44,123 +50,326 @@ class AgentService:
         self.active_tasks[trace_id] = ActiveTask(
             message_id=trace_id,
             instruction=trace.instruction,
-            modelId=trace.modelId,
             timestamp=trace.timestamp,
             steps=trace.steps,
             traceMetadata=trace.traceMetadata,
         )
-        # Start the agent processing in the background
-        asyncio.create_task(self._simulate_agent_processing(trace))
         return trace_id
-    async def _simulate_agent_processing(self, trace: AgentTrace):
-        """Simulate agent processing using simulated_trace.json data"""
-        trace_id = trace.id
         try:
-            # Load simulation data
-            with open(self.simulation_data_path, "r") as f:
-                simulation_data = json.load(f)
-            # Send agent start event with the initial trace
-            start_event = AgentStartEvent(type="agent_start", agentTrace=trace)
-            await self.websocket_manager.broadcast(start_event)
-            # mock VNC URL
-            vnc_url = "https://www.youtube.com/embed/VCutEsRSJ5A?si=PT0ETJ7zIJ9ywhGW"
-            vnc_set_event = VncUrlSetEvent(type="vnc_url_set", vncUrl=vnc_url)
-            await self.websocket_manager.broadcast(vnc_set_event)
-            trace_metadata = AgentTraceMetadata(traceId=trace_id, maxSteps=20)
-            # Process each step from the simulation data
-            for step_data in simulation_data["steps"]:
-                # Wait before sending the next step to simulate processing time
-                await asyncio.sleep(step_data["duration"])
-                # Load and encode the image
-                image_path = (
-                    self.simulation_images_path / step_data["image"].split("/")[-1]
-                )
-                with open(image_path, "rb") as img_file:
-                    image_bytes = img_file.read()
-                    image_base64 = f"data:image/png;base64,{base64.b64encode(image_bytes).decode('utf-8')}"
-                # Convert actions to AgentAction objects
-                actions = [
-                    AgentAction(
-                        actionType=action["actionType"],
-                        actionArguments=action["actionArguments"],
-                    )
-                    for action in step_data["actions"]
-                ]
-                # Create agent step
-                agent_step = AgentStep(
-                    traceId=trace_id,
-                    stepId=step_data["stepId"],
-                    image=image_base64,
-                    thought=step_data["thought"],
-                    actions=actions,
-                    error="",
-                    duration=step_data["duration"],
-                    inputTokensUsed=step_data["inputTokensUsed"],
-                    outputTokensUsed=step_data["outputTokensUsed"],
-                    step_evaluation=step_data["step_evaluation"],
                 )
-                trace_metadata.numberOfSteps += 1
-                trace_metadata.duration += step_data["duration"]
-                trace_metadata.inputTokensUsed += step_data["inputTokensUsed"]
-                trace_metadata.outputTokensUsed += step_data["outputTokensUsed"]
-                # Send progress event
-                progress_event = AgentProgressEvent(
-                    type="agent_progress",
-                    agentStep=agent_step,
-                    traceMetadata=trace_metadata,
-                )
-                await self.websocket_manager.broadcast(progress_event)
-                # Update active task
-                self.active_tasks[trace_id].steps.append(agent_step)
-            # Unset VNC URL before completion
-            vnc_unset_event = VncUrlUnsetEvent(type="vnc_url_unset")
-            await self.websocket_manager.broadcast(vnc_unset_event)
             # Send completion event
-            complete_event = AgentCompleteEvent(
-                type="agent_complete", traceMetadata=trace_metadata
             )
-            await self.websocket_manager.broadcast(complete_event)
-            # Update active task with final metadata
-            self.active_tasks[trace_id].traceMetadata = trace_metadata
-            # Clean up after a delay
-            await asyncio.sleep(1)
-            if trace_id in self.active_tasks:
-                del self.active_tasks[trace_id]
-        except Exception as e:
-            print(f"Error in agent simulation: {str(e)}")
-            # Send error event
-            error_event = AgentErrorEvent(
-                type="agent_error", error=f"Error processing task: {str(e)}"
             )
-            await self.websocket_manager.broadcast(error_event)
-            # Clean up
-            if trace_id in self.active_tasks:
-                del self.active_tasks[trace_id]
-    def get_active_tasks(self) -> dict:
-        """Get currently active tasks"""
-        return self.active_tasks.copy()
-    def get_task_status(self, message_id: str) -> Optional[dict]:
-        """Get status of a specific task"""
-        return self.active_tasks.get(message_id)

 import asyncio
 import base64
 import json
+import logging
+import os
+import time
+from io import BytesIO
+from typing import Callable, Literal
 from cua2_core.models.models import (
     ActiveTask,
     AgentAction,
     AgentStep,
     AgentTrace,
     AgentTraceMetadata,
 )
+from cua2_core.services.agent_utils.desktop_agent import E2BVisionAgent
+from cua2_core.services.agent_utils.function_parser import parse_function_call
+from cua2_core.services.agent_utils.get_model import get_model
+from cua2_core.services.sandbox_service import SandboxService
+from cua2_core.websocket.websocket_manager import WebSocketException, WebSocketManager
+from e2b_desktop import Sandbox
+from fastapi import WebSocket
+from PIL import Image
+from smolagents import ActionStep, AgentImage, AgentMaxStepsError, TaskStep
+logger = logging.getLogger(__name__)
 class AgentService:
     """Service for handling agent tasks and processing"""
+    def __init__(
+        self, websocket_manager: WebSocketManager, sandbox_service: SandboxService
+    ):
         self.active_tasks: dict[str, ActiveTask] = {}
         self.websocket_manager: WebSocketManager = websocket_manager
+        self.task_websockets: dict[str, WebSocket] = {}
+        self.sandbox_service: SandboxService = sandbox_service
+        self.last_screenshot: dict[str, AgentImage] = {}
+    async def process_user_task(self, trace: AgentTrace, websocket: WebSocket) -> str:
         """Process a user task and return the trace ID"""
         trace_id = trace.id
         self.active_tasks[trace_id] = ActiveTask(
             message_id=trace_id,
             instruction=trace.instruction,
+            model_id=trace.modelId,
             timestamp=trace.timestamp,
             steps=trace.steps,
             traceMetadata=trace.traceMetadata,
         )
+        # Store the websocket for this task
+        self.task_websockets[trace_id] = websocket
+        asyncio.create_task(self._agent_processing(trace_id))
         return trace_id
+    async def _agent_runner(
+        self,
+        message_id: str,
+        step_callback: Callable[[ActionStep, E2BVisionAgent], None],
+    ):
+        """Run the task with the appropriate agent"""
+        sandbox: Sandbox | None = None
+        agent = None
+        novnc_active = False
+        websocket_exception = False
         try:
+            # Get the websocket for this task
+            websocket = self.task_websockets.get(message_id)
+            await self.websocket_manager.send_agent_start(
+                active_task=self.active_tasks[message_id], websocket=websocket
+            )
+            model = get_model(self.active_tasks[message_id].model_id)
+            # Acquire a sandbox from the pool
+            sandbox = await self.sandbox_service.acquire_sandbox(message_id)
+            if sandbox is None:
+                raise Exception("No sandbox available: pool limit reached")
+            data_dir = self.active_tasks[message_id].trace_path
+            user_content = self.active_tasks[message_id].instruction
+            agent = E2BVisionAgent(
+                model=model,
+                data_dir=data_dir,
+                desktop=sandbox,
+                step_callbacks=[step_callback],
+            )
+            self.active_tasks[message_id].traceMetadata.maxSteps = agent.max_steps
+            await self.websocket_manager.send_vnc_url_set(
+                vnc_url=sandbox.stream.get_url(
+                    auto_connect=True,
+                    view_only=True,
+                    resize="scale",
+                    auth_key=sandbox.stream.get_auth_key(),
                 )
+                or "",
+                websocket=websocket,
+            )
+            novnc_active = True
+            step_filename = f"{message_id}-1"
+            screenshot_bytes = agent.desktop.screenshot()
+            image = Image.open(BytesIO(screenshot_bytes))
+            screenshot_path = os.path.join(agent.data_dir, f"{step_filename}.png")
+            image.save(screenshot_path)
+            self.last_screenshot[message_id] = image
+            await asyncio.to_thread(
+                agent.run,
+                user_content,
+            )
+            self.active_tasks[message_id].traceMetadata.completed = True
+        except WebSocketException:
+            websocket_exception = True
+            pass
+        except (Exception, KeyboardInterrupt):
+            import traceback
+            logger.error(
+                f"Error processing task: {traceback.format_exc()}", exc_info=True
+            )
+            await self.websocket_manager.send_agent_error(
+                error="Error processing task", websocket=websocket
+            )
+        finally:
             # Send completion event
+            if not websocket_exception:
+                await self.websocket_manager.send_agent_complete(
+                    metadata=self.active_tasks[message_id].traceMetadata,
+                    websocket=websocket,
+                )
+                if novnc_active:
+                    await self.websocket_manager.send_vnc_url_unset(websocket=websocket)
+            novnc_active = False
+            # Clean up
+            if message_id in self.active_tasks:
+                self.active_tasks[message_id].store_model()
+                del self.active_tasks[message_id]
+            # Clean up websocket reference
+            if message_id in self.task_websockets:
+                del self.task_websockets[message_id]
+            if message_id in self.last_screenshot:
+                del self.last_screenshot[message_id]
+            # Release sandbox back to the pool
+            if sandbox:
+                await self.sandbox_service.release_sandbox(sandbox)
+    async def _agent_processing(
+        self,
+        message_id: str,
+    ):
+        """Process the user task with the appropriate agent"""
+        # Set up log file for this task
+        active_task = self.active_tasks[message_id]
+        # Ensure the directory exists
+        os.makedirs(active_task.trace_path, exist_ok=True)
+        # Capture the event loop reference in the async context
+        # This will be used in the callback to safely schedule coroutines from the worker thread
+        loop = asyncio.get_running_loop()
+        def step_callback(memory_step: ActionStep, agent: E2BVisionAgent):
+            assert memory_step.step_number is not None
+            time.sleep(3)
+            if message_id in self.last_screenshot:
+                memory_step.observations_images = [
+                    self.last_screenshot[message_id].copy()
+                ]
+            else:
+                image = self.last_screenshot[message_id]
+                # agent.last_marked_screenshot = AgentImage(screenshot_path)
+                for previous_memory_step in (
+                    agent.memory.steps
+                ):  # Remove previous screenshots from logs for lean processing
+                    if (
+                        isinstance(previous_memory_step, ActionStep)
+                        and previous_memory_step.step_number is not None
+                        and previous_memory_step.step_number
+                        <= memory_step.step_number - 1
+                    ):
+                        previous_memory_step.observations_images = None
+                    elif isinstance(previous_memory_step, TaskStep):
+                        previous_memory_step.task_images = None
+                memory_step.observations_images = [image.copy()]
+            model_output = (
+                memory_step.model_output_message.content
+                if memory_step.model_output_message
+                else None
+            )
+            if model_output is None and isinstance(
+                memory_step.error, AgentMaxStepsError
+            ):
+                model_output = memory_step.action_output
+            thought = (
+                model_output.split("```")[0].replace("\nAction:\n", "")
+                if model_output
+                and (
+                    memory_step.error is None
+                    or isinstance(memory_step.error, AgentMaxStepsError)
+                )
+                else None
+            )
+            action_sequence = (
+                model_output.split("```")[1]
+                if model_output and memory_step.error is None
+                else None
             )
+            if memory_step.observations_images:
+                image = memory_step.observations_images[0]
+                buffered = BytesIO()
+                image.save(buffered, format="PNG")
+                image_base64 = f"data:image/png;base64,{base64.b64encode(buffered.getvalue()).decode('utf-8')}"
+                del buffered
+                del image
+            else:
+                image_base64 = None
+            step = AgentStep(
+                traceId=message_id,
+                stepId=str(memory_step.step_number),
+                image=image_base64,
+                thought=thought,
+                actions=AgentAction.from_function_calls(
+                    parse_function_call(action_sequence)
+                )
+                if action_sequence
+                else None,
+                error=memory_step.error.message if memory_step.error else None,
+                duration=memory_step.timing.duration,
+                inputTokensUsed=memory_step.token_usage.input_tokens,
+                outputTokensUsed=memory_step.token_usage.output_tokens,
+                step_evaluation="neutral",
+            )
+            self.active_tasks[
+                message_id
+            ].traceMetadata.inputTokensUsed += memory_step.token_usage.input_tokens
+            self.active_tasks[
+                message_id
+            ].traceMetadata.outputTokensUsed += memory_step.token_usage.output_tokens
+            self.active_tasks[message_id].traceMetadata.numberOfSteps += 1
+            self.active_tasks[
+                message_id
+            ].traceMetadata.duration += memory_step.timing.duration
+            # Add step to active task
+            self.active_tasks[message_id].update_step(step)
+            websocket = self.task_websockets.get(message_id)
+            future = asyncio.run_coroutine_threadsafe(
+                self.websocket_manager.send_agent_progress(
+                    step=step,
+                    metadata=self.active_tasks[message_id].traceMetadata,
+                    websocket=websocket,
+                ),
+                loop,
             )
+            future.result()
+            step_filename = f"{message_id}-{memory_step.step_number}"
+            screenshot_bytes = agent.desktop.screenshot()
+            image = Image.open(BytesIO(screenshot_bytes))
+            screenshot_path = os.path.join(agent.data_dir, f"{step_filename}.png")
+            image.save(screenshot_path)
+            del self.last_screenshot[message_id]
+            self.last_screenshot[message_id] = image
+        await self._agent_runner(message_id, step_callback)
+    def update_trace_step(
+        self,
+        trace_id: str,
+        step_id: str,
+        step_evaluation: Literal["like", "dislike", "neutral"],
+    ):
+        """
+        Update a specific step in a trace (e.g., update step evaluation)
+        Args:
+            trace_id: The trace ID
+            step_id: The step ID (1-indexed)
+            step_evaluation: The evaluation value to set
+        Returns:
+            The updated AgentStep
+        Raises:
+            ValueError: If step_id is invalid or step not found
+            FileNotFoundError: If trace not found
+        """
+        # Try to find in active tasks first
+        active_task = self.active_tasks.get(trace_id)
+        if active_task:
+            # Task is still active
+            try:
+                step_index = int(step_id) - 1
+                if 0 <= step_index < len(active_task.steps):
+                    active_task.steps[step_index].step_evaluation = step_evaluation
+                    active_task.update_step(active_task.steps[step_index])
+                else:
+                    raise ValueError(f"Step {step_id} not found in trace")
+            except (ValueError, TypeError) as e:
+                raise ValueError(f"Invalid step_id format: {e}")
+        else:
+            # Task is not active, try to load from file
+            data_dir = "data"
+            trace_dirs = [
+                d for d in os.listdir(data_dir) if d.startswith(f"trace-{trace_id}")
+            ]
+            if not trace_dirs:
+                raise FileNotFoundError("Trace not found")
+            trace_path = os.path.join(data_dir, trace_dirs[0])
+            tasks_file = os.path.join(trace_path, "tasks.json")
+            if not os.path.exists(tasks_file):
+                raise FileNotFoundError("Trace data not found")
+            try:
+                # Load the trace data
+                with open(tasks_file, "r") as f:
+                    task_data = json.load(f)
+                # Find and update the step
+                step_index = int(step_id) - 1
+                if 0 <= step_index < len(task_data["steps"]):
+                    task_data["steps"][step_index]["step_evaluation"] = step_evaluation
+                    # Save the updated data
+                    with open(tasks_file, "w") as f:
+                        json.dump(task_data, f, indent=2)
+                    # Convert to AgentStep for response
+                    updated_step = AgentStep(**task_data["steps"][step_index])
+                    return updated_step
+                else:
+                    raise ValueError(f"Step {step_id} not found in trace")
+            except (ValueError, KeyError, TypeError) as e:
+                raise ValueError(f"Error processing step update: {e}")

cua2-core/src/cua2_core/services/agent_utils/desktop_agent.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import os
+import time
+import unicodedata
+from cua2_core.services.agent_utils.prompt import E2B_SYSTEM_PROMPT_TEMPLATE
+# E2B imports
+from e2b_desktop import Sandbox
+# SmolaAgents imports
+from smolagents import CodeAgent, Model, tool
+from smolagents.monitoring import LogLevel
+class E2BVisionAgent(CodeAgent):
+    """Agent for e2b desktop automation with Qwen2.5VL vision capabilities"""
+    def __init__(
+        self,
+        model: Model,
+        data_dir: str,
+        desktop: Sandbox,
+        max_steps: int = 200,
+        verbosity_level: LogLevel = 2,
+        planning_interval: int | None = None,
+        use_v1_prompt: bool = False,
+        **kwargs,
+    ):
+        self.desktop = desktop
+        self.data_dir = data_dir
+        self.planning_interval = planning_interval
+        # Initialize Desktop
+        self.width, self.height = self.desktop.get_screen_size()
+        print(f"Screen size: {self.width}x{self.height}")
+        # Set up temp directory
+        os.makedirs(self.data_dir, exist_ok=True)
+        print(f"Screenshots and steps will be saved to: {self.data_dir}")
+        self.use_v1_prompt = use_v1_prompt
+        # Initialize base agent
+        super().__init__(
+            tools=[],
+            model=model,
+            max_steps=max_steps,
+            verbosity_level=verbosity_level,
+            planning_interval=self.planning_interval,
+            stream_outputs=True,
+            **kwargs,
+        )
+        self.prompt_templates["system_prompt"] = E2B_SYSTEM_PROMPT_TEMPLATE.replace(
+            "<<resolution_x>>", str(self.width)
+        ).replace("<<resolution_y>>", str(self.height))
+        # Add screen info to state
+        self.state["screen_width"] = self.width
+        self.state["screen_height"] = self.height
+        # Add default tools
+        self.logger.log("Setting up agent tools...")
+        self._setup_desktop_tools()
+    def _setup_desktop_tools(self):
+        """Register all desktop tools"""
+        @tool
+        def click(x: int, y: int) -> str:
+            """
+            Performs a left-click at the specified coordinates
+            Args:
+                x: The x coordinate (horizontal position)
+                y: The y coordinate (vertical position)
+            """
+            self.desktop.move_mouse(x, y)
+            self.desktop.left_click()
+            self.click_coordinates = [x, y]
+            self.logger.log(f"Clicked at coordinates ({x}, {y})")
+            return f"Clicked at coordinates ({x}, {y})"
+        @tool
+        def right_click(x: int, y: int) -> str:
+            """
+            Performs a right-click at the specified coordinates
+            Args:
+                x: The x coordinate (horizontal position)
+                y: The y coordinate (vertical position)
+            """
+            self.desktop.move_mouse(x, y)
+            self.desktop.right_click()
+            self.click_coordinates = [x, y]
+            self.logger.log(f"Right-clicked at coordinates ({x}, {y})")
+            return f"Right-clicked at coordinates ({x}, {y})"
+        @tool
+        def double_click(x: int, y: int) -> str:
+            """
+            Performs a double-click at the specified coordinates
+            Args:
+                x: The x coordinate (horizontal position)
+                y: The y coordinate (vertical position)
+            """
+            self.desktop.move_mouse(x, y)
+            self.desktop.double_click()
+            self.click_coordinates = [x, y]
+            self.logger.log(f"Double-clicked at coordinates ({x}, {y})")
+            return f"Double-clicked at coordinates ({x}, {y})"
+        @tool
+        def move_mouse(x: int, y: int) -> str:
+            """
+            Moves the mouse cursor to the specified coordinates
+            Args:
+                x: The x coordinate (horizontal position)
+                y: The y coordinate (vertical position)
+            """
+            self.desktop.move_mouse(x, y)
+            self.logger.log(f"Moved mouse to coordinates ({x}, {y})")
+            return f"Moved mouse to coordinates ({x}, {y})"
+        def normalize_text(text):
+            return "".join(
+                c
+                for c in unicodedata.normalize("NFD", text)
+                if not unicodedata.combining(c)
+            )
+        @tool
+        def write(text: str) -> str:
+            """
+            Types the specified text at the current cursor position.
+            Args:
+                text: The text to type
+            """
+            clean_text = normalize_text(text)
+            self.desktop.write(clean_text, delay_in_ms=75)
+            self.logger.log(f"Typed text: '{clean_text}'")
+            return f"Typed text: '{clean_text}'"
+        @tool
+        def press(key: str) -> str:
+            """
+            Presses a keyboard key
+            Args:
+                key: The key to press (e.g. "enter", "space", "backspace", etc.).
+            """
+            self.desktop.press(key)
+            self.logger.log(f"Pressed key: {key}")
+            return f"Pressed key: {key}"
+        @tool
+        def go_back() -> str:
+            """
+            Goes back to the previous page in the browser. If using this tool doesn't work, just click the button directly.
+            Args:
+            """
+            self.desktop.press(["alt", "left"])
+            self.logger.log("Went back one page")
+            return "Went back one page"
+        @tool
+        def drag(x1: int, y1: int, x2: int, y2: int) -> str:
+            """
+            Clicks [x1, y1], drags mouse to [x2, y2], then release click.
+            Args:
+                x1: origin x coordinate
+                y1: origin y coordinate
+                x2: end x coordinate
+                y2: end y coordinate
+            """
+            self.desktop.drag([x1, y1], [x2, y2])
+            message = f"Dragged and dropped from [{x1}, {y1}] to [{x2}, {y2}]"
+            self.logger.log(message)
+            return message
+        @tool
+        def scroll(x: int, y: int, direction: str = "down", amount: int = 2) -> str:
+            """
+            Moves the mouse to selected coordinates, then uses the scroll button: this could scroll the page or zoom, depending on the app. DO NOT use scroll to move through linux desktop menus.
+            Args:
+                x: The x coordinate (horizontal position) of the element to scroll/zoom
+                y: The y coordinate (vertical position) of the element to scroll/zoom
+                direction: The direction to scroll ("up" or "down"), defaults to "down". For zoom, "up" zooms in, "down" zooms out.
+                amount: The amount to scroll. A good amount is 1 or 2.
+            """
+            self.desktop.move_mouse(x, y)
+            self.desktop.scroll(direction=direction, amount=amount)
+            message = f"Scrolled {direction} by {amount}"
+            self.logger.log(message)
+            return message
+        @tool
+        def wait(seconds: float) -> str:
+            """
+            Waits for the specified number of seconds. Very useful in case the prior order is still executing (for example starting very heavy applications like browsers or office apps)
+            Args:
+                seconds: Number of seconds to wait, generally 3 is enough.
+            """
+            time.sleep(seconds)
+            self.logger.log(f"Waited for {seconds} seconds")
+            return f"Waited for {seconds} seconds"
+        @tool
+        def open(url: str) -> str:
+            """
+            Directly opens a browser with the specified url: use this at start of web searches rather than trying to click the browser.
+            Args:
+                url: The URL to open
+            """
+            # Make sure URL has http/https prefix
+            if not url.startswith(("http://", "https://")):
+                url = "https://" + url
+            self.desktop.open(url)
+            # Give it time to load
+            time.sleep(2)
+            self.logger.log(f"Opening URL: {url}")
+            return f"Opened URL: {url}"
+        # Register the tools
+        self.tools["click"] = click
+        self.tools["right_click"] = right_click
+        self.tools["double_click"] = double_click
+        self.tools["move_mouse"] = move_mouse
+        self.tools["write"] = write
+        self.tools["press"] = press
+        self.tools["scroll"] = scroll
+        self.tools["wait"] = wait
+        self.tools["open"] = open
+        self.tools["go_back"] = go_back
+        self.tools["drag"] = drag
+        self.tools["scroll"] = scroll

cua2-core/src/cua2_core/services/agent_utils/function_parser.py ADDED Viewed

	@@ -0,0 +1,560 @@

+#!/usr/bin/env python3
+"""
+Function parser for extracting function names, parameter names, and values from string function calls.
+Supports both mobile and pyautogui function patterns.
+"""
+import re
+from collections import OrderedDict
+from typing import Any, Dict, List, Tuple
+from pydantic import BaseModel
+class FunctionCall(BaseModel):
+    """Represents a parsed function call with its parameters."""
+    function_name: str
+    parameters: Dict[str, Any]
+    original_string: str
+    description: str = ""
+    def to_string(self) -> str:
+        """
+        Reconstruct the function call string from the parsed data.
+        Returns:
+            String representation of the function call
+        Examples:
+            >>> call = FunctionCall("mobile.wait", {"seconds": 3}, "mobile.wait(seconds=3)")
+            >>> call.to_string()
+            "mobile.wait(seconds=3)"
+            >>> call = FunctionCall("function", {"arg_0": 1, "arg_1": 2, "x": 0.5}, "function(1, 2, x=0.5)")
+            >>> call.to_string()
+            "function(1, 2, x=0.5)"
+        """
+        if not self.parameters:
+            return f"{self.function_name}()"
+        # Separate positional and named arguments
+        positional_args = []
+        named_args = []
+        for name, value in self.parameters.items():
+            if name.startswith("arg_"):
+                # Positional argument
+                positional_args.append((int(name.split("_")[1]), value))
+            else:
+                # kwargs
+                named_args.append((name, value))
+        # Sort positional arguments by index
+        positional_args.sort(key=lambda x: x[0])
+        # Build parameter string
+        param_parts = []
+        # Add positional arguments
+        for _, value in positional_args:
+            param_parts.append(self._value_to_string(value))
+        # Add named arguments
+        for name, value in named_args:
+            param_parts.append(f"{name}={self._value_to_string(value)}")
+        return f"{self.function_name}({', '.join(param_parts)})"
+    def _value_to_string(self, value: Any) -> str:
+        """
+        Convert a value to its string representation for function calls.
+        Args:
+            value: The value to convert
+        Returns:
+            String representation of the value
+        """
+        if isinstance(value, str):
+            # Quote strings
+            return f"'{value}'"
+        elif isinstance(value, (list, tuple)):
+            # Convert lists/tuples to string representation
+            items = [self._value_to_string(item) for item in value]
+            return f"[{', '.join(items)}]"
+        elif isinstance(value, dict):
+            # Convert dictionaries to string representation
+            items = [f"'{k}': {self._value_to_string(v)}" for k, v in value.items()]
+            return f"{{{', '.join(items)}}}"
+        elif isinstance(value, bool):
+            # Convert booleans to lowercase
+            return str(value).lower()
+        elif value is None:
+            return "None"
+        else:
+            # Numbers and other types
+            return str(value)
+def parse_function_call(
+    function_string: str, pattern_to_match: list[str] = []
+) -> List[FunctionCall]:
+    """
+    Parse a function call string and extract all function calls found.
+    Args:
+        function_string: String representation of function calls
+    Returns:
+        List of FunctionCall objects with parsed information
+    Examples:
+        >>> parse_function_call("mobile.wait(seconds=3)")
+        [FunctionCall(function_name='wait', parameters={'seconds': 3}, ...)]
+        >>> parse_function_call("mobile. wait(seconds=3)")
+        [FunctionCall(function_name='wait', parameters={'seconds': 3}, ...)]
+        >>> parse_function_call("mobile.wait(seconds=3) mobile.home()")
+        [FunctionCall(function_name='wait', parameters={'seconds': 3}, ...), FunctionCall(function_name='home', parameters={}, ...)]
+    """
+    # Remove any leading/trailing whitespace
+    function_string = function_string.strip()
+    # Pattern to match function calls with parameters
+    # Matches: function_name(param1=value1, param2=value2, ...)
+    # Can have any characters before the function call, extracts just the function name
+    pattern = r".*?([a-zA-Z_][a-zA-Z0-9_.]*)\(([^)]*)\)"
+    matches = re.findall(pattern, function_string)
+    if not matches:
+        # No valid function calls found in: {function_string}
+        return []
+    results = []
+    for match in matches:
+        function_name = match[0]
+        params_string = match[1]
+        if pattern_to_match and all(
+            pattern not in function_name for pattern in pattern_to_match
+        ):
+            continue
+        # Parse parameters
+        parameters = parse_parameters(params_string)
+        # Create the original string for this specific function call
+        original_string = f"{function_name}({params_string})"
+        results.append(
+            FunctionCall(
+                function_name=function_name,
+                parameters=parameters,
+                original_string=original_string,
+            )
+        )
+    return results
+def parse_parameters(params_string: str) -> Dict[str, Any]:
+    """
+    Parse parameter string and extract parameter names and values.
+    Args:
+        params_string: String containing parameters (e.g., "x=0.5, y=0.6, text='hello'")
+    Returns:
+        Dictionary mapping parameter names to their values
+    Examples:
+        >>> parse_parameters("x=0.5, y=0.6")
+        {'x': 0.5, 'y': 0.6}
+        >>> parse_parameters("app_name='drupe'")
+        {'app_name': 'drupe'}
+        >>> parse_parameters("'text'")
+        {'arg_0': 'text'}
+        >>> parse_parameters("1, 3, 4")
+        {'arg_0': 1, 'arg_1': 3, 'arg_2': 4}
+        >>> parse_parameters("arg1, arg2, x=0.5")
+        {'arg_0': 'arg1', 'arg_1': 'arg2', 'x': 0.5}
+    """
+    if not params_string.strip():
+        return {}
+    parameters = OrderedDict()
+    # Split by commas, but be careful with commas inside quotes or brackets
+    param_parts = split_parameters(params_string)
+    positional_index = 0
+    for part in param_parts:
+        part = part.strip()
+        if not part:
+            continue
+        # Parse individual parameter
+        name, value = parse_single_parameter(part)
+        # For positional arguments, use index-based naming
+        if name.startswith("arg_"):
+            name = f"arg_{positional_index}"
+            positional_index += 1
+        parameters[name] = value
+    return parameters
+def split_parameters(params_string: str) -> List[str]:
+    """
+    Split parameter string by commas, respecting quotes and brackets.
+    Args:
+        params_string: String containing parameters
+    Returns:
+        List of individual parameter strings
+    """
+    parts = []
+    current_part = ""
+    paren_count = 0
+    bracket_count = 0
+    brace_count = 0
+    in_quotes = False
+    quote_char = None
+    for char in params_string:
+        if char in ['"', "'"] and (not in_quotes or char == quote_char):
+            if not in_quotes:
+                in_quotes = True
+                quote_char = char
+            else:
+                in_quotes = False
+                quote_char = None
+        elif not in_quotes:
+            if char == "(":
+                paren_count += 1
+            elif char == ")":
+                paren_count -= 1
+            elif char == "[":
+                bracket_count += 1
+            elif char == "]":
+                bracket_count -= 1
+            elif char == "{":
+                brace_count += 1
+            elif char == "}":
+                brace_count -= 1
+            elif (
+                char == ","
+                and paren_count == 0
+                and bracket_count == 0
+                and brace_count == 0
+            ):
+                parts.append(current_part.strip())
+                current_part = ""
+                continue
+        current_part += char
+    if current_part.strip():
+        parts.append(current_part.strip())
+    return parts
+def parse_single_parameter(param_string: str) -> Tuple[str, Any]:
+    """
+    Parse a single parameter string into name and value.
+    Args:
+        param_string: String like "x=0.5" or "app_name='drupe'" or just "value"
+    Returns:
+        Tuple of (parameter_name, parameter_value)
+    Examples:
+        >>> parse_single_parameter("x=0.5")
+        ('x', 0.5)
+        >>> parse_single_parameter("app_name='drupe'")
+        ('app_name', 'drupe')
+        >>> parse_single_parameter("'text'")
+        ('arg_0', 'text')
+        >>> parse_single_parameter("123")
+        ('arg_0', 123)
+        >>> parse_single_parameter("3")
+        ('arg_0', 3)
+    """
+    # Pattern to match parameter name and value
+    pattern = r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.+)$"
+    match = re.match(pattern, param_string)
+    if match:
+        # Named parameter
+        param_name = match.group(1)
+        param_value_str = match.group(2).strip()
+        param_value = parse_value(param_value_str)
+        return param_name, param_value
+    else:
+        # Positional parameter - treat as unnamed argument
+        param_value = parse_value(param_string)
+        return "arg_0", param_value
+def parse_value(value_string: str) -> Any:
+    """
+    Parse a value string into appropriate Python type.
+    Args:
+        value_string: String representation of a value
+    Returns:
+        Parsed value (int, float, str, list, etc.)
+    Examples:
+        >>> parse_value("3")
+        3
+        >>> parse_value("3.14")
+        3.14
+        >>> parse_value("'hello'")
+        'hello'
+        >>> parse_value("[0.581, 0.898]")
+        [0.581, 0.898]
+    """
+    value_string = value_string.strip()
+    # String values (quoted)
+    if (value_string.startswith("'") and value_string.endswith("'")) or (
+        value_string.startswith('"') and value_string.endswith('"')
+    ):
+        return value_string[1:-1]
+    # List values
+    if value_string.startswith("[") and value_string.endswith("]"):
+        return parse_list(value_string)
+    # Dictionary values
+    if value_string.startswith("{") and value_string.endswith("}"):
+        return parse_dict(value_string)
+    # Boolean values
+    if value_string.lower() in ["true", "false"]:
+        return value_string.lower() == "true"
+    # None value
+    if value_string.lower() == "none":
+        return None
+    # Numeric values
+    try:
+        # Try integer first
+        if "." not in value_string:
+            return int(value_string)
+        else:
+            return float(value_string)
+    except ValueError:
+        # If it's not a number, return as string (remove quotes if present)
+        if value_string.startswith("'") and value_string.endswith("'"):
+            return value_string[1:-1]
+        elif value_string.startswith('"') and value_string.endswith('"'):
+            return value_string[1:-1]
+        else:
+            return value_string
+def parse_list(list_string: str) -> List[Any]:
+    """
+    Parse a list string into a Python list.
+    Args:
+        list_string: String like "[0.581, 0.898]"
+    Returns:
+        List of parsed values
+    Examples:
+        >>> parse_list("[0.581, 0.898]")
+        [0.581, 0.898]
+    """
+    # Remove outer brackets
+    content = list_string[1:-1].strip()
+    if not content:
+        return []
+    # Split by commas, respecting nested structures
+    parts = split_parameters(content)
+    return [parse_value(part.strip()) for part in parts]
+def parse_dict(dict_string: str) -> Dict[str, Any]:
+    """
+    Parse a dictionary string into a Python dict.
+    Args:
+        dict_string: String like "{'key': 'value'}"
+    Returns:
+        Dictionary of parsed key-value pairs
+    """
+    # Remove outer braces
+    content = dict_string[1:-1].strip()
+    if not content:
+        return {}
+    # Split by commas, respecting nested structures
+    parts = split_parameters(content)
+    result = {}
+    for part in parts:
+        part = part.strip()
+        if ":" in part:
+            key_str, value_str = part.split(":", 1)
+            key = parse_value(key_str.strip())
+            value = parse_value(value_str.strip())
+            result[key] = value
+    return result
+def parse_multiple_functions(function_strings: List[str]) -> List[FunctionCall]:
+    """
+    Parse multiple function call strings.
+    Args:
+        function_strings: List of function call strings
+    Returns:
+        List of FunctionCall objects
+    """
+    results = []
+    for func_str in function_strings:
+        try:
+            result_list = parse_function_call(func_str)
+            results.extend(result_list)
+        except Exception as e:
+            print(f"Warning: Could not parse function call '{func_str}': {e}")
+            continue
+    return results
+def extract_function_calls_from_text(text: str) -> List[FunctionCall]:
+    """
+    Extract and parse function calls from a text block.
+    Args:
+        text: Text containing function calls
+    Returns:
+        List of FunctionCall objects
+    """
+    # Pattern to find function calls in text
+    # Matches: function_name(param1=value1, param2=value2)
+    pattern = r"[a-zA-Z_][a-zA-Z0-9_.]*\([^)]*\)"
+    matches = re.findall(pattern, text)
+    return parse_multiple_functions(matches)
+# Example usage and testing
+if __name__ == "__main__":
+    test_cases = [
+        "mobile.home()",
+        "mobile.open_app(app_name='drupe')",
+        "mobile.swipe(from_coord=[0.581, 0.898], to_coord=[0.601, 0.518])",
+        "mobile.back()",
+        "mobile.long_press(x=0.799, y=0.911)",
+        "mobile.terminate(status='success')",
+        "answer('text')",
+        "pyautogui.hscroll(page=-0.1)",
+        "pyautogui.scroll(page=-0.1)",
+        "pyautogui.scroll(0.13)",
+        "pyautogui.click(x=0.8102, y=0.9463)",
+        "pyautogui.hotkey(keys=['ctrl', 'c'])",
+        "pyautogui.press(keys='enter')",
+        "pyautogui.press(keys=['enter'])",
+        "pyautogui.moveTo(x=0.04, y=0.405)",
+        "pyautogui.write(message='bread buns')",
+        "pyautogui.dragTo(x=0.8102, y=0.9463)",
+        "mobile.wait(seconds=3)\nmobile.swipe(from_coord=[0.581, 0.898], to_coord=[0.601, 0.518])",
+        # Additional test cases for multiple positional arguments
+        "function(arg1, arg2, arg3)",
+        "function('hello', 123, x=0.5)",
+        "function(arg1, arg2, named_param='value')",
+        "function(1, 2, 3, 4, 5)",
+        "function('a', 'b', 'c', x=1, y=2)",
+    ]
+    print("Testing function parser:")
+    print("=" * 50)
+    for test_case in test_cases:
+        try:
+            results = parse_function_call(test_case)
+            print(f"✓ {test_case}")
+            for result in results:
+                print(f"  Function: {result.function_name}")
+                print(f"  Parameters: {result.parameters}")
+            print()
+        except Exception as e:
+            print(f"✗ {test_case}")
+            print(f"  Error: {e}")
+            print()
+    # Test extracting from text
+    print("Testing text extraction:")
+    print("=" * 50)
+    sample_text = """
+    mobile.wait(seconds=3)
+    mobile.open_app(app_name='drupe')
+    pyautogui.click(x=0.8102, y=0.9463)
+    pyautogui.write(message='bread buns')
+    """
+    extracted = extract_function_calls_from_text(sample_text)
+    for func_call in extracted:
+        print(f"Found: {func_call.function_name} with params: {func_call.parameters}")
+    # Test reconstruction
+    print("\nTesting function call reconstruction:")
+    print("=" * 50)
+    reconstruction_tests = [
+        "mobile.wait(seconds=3)",
+        "mobile.home()",
+        "mobile.open_app(app_name='drupe')",
+        "mobile.swipe(from_coord=[0.581, 0.898], to_coord=[0.601, 0.518])",
+        "answer('text')",
+        "pyautogui.scroll(0.13)",
+        "pyautogui.click(x=0.8102, y=0.9463)",
+        "pyautogui.hotkey(keys=['ctrl', 'c'])",
+        "function(1, 2, 3)",
+        "function('hello', 123, x=0.5, y=0.8)",
+        "function([1, 3], 'arg2', named_param='value')",
+    ]
+    for test_case in reconstruction_tests:
+        parsed_list = parse_function_call(test_case)
+        for parsed in parsed_list:
+            reconstructed = parsed.to_string()
+            print(f"Original:  {test_case}")
+            print(f"Reconstructed: {reconstructed}")
+            print(f"Match: {test_case == reconstructed}")
+            assert test_case == reconstructed
+            print()

cua2-core/src/cua2_core/services/agent_utils/get_model.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from smolagents import InferenceClientModel, Model
+# Available model IDs
+AVAILABLE_MODELS = [
+    "Qwen/Qwen3-VL-2B-Instruct",
+    "Qwen/Qwen3-VL-2B-Thinking",
+    "Qwen/Qwen3-VL-4B-Instruct",
+    "Qwen/Qwen3-VL-4B-Thinking",
+    "Qwen/Qwen3-VL-8B-Instruct",
+    "Qwen/Qwen3-VL-8B-Thinking",
+    "Qwen/Qwen3-VL-30B-A3B-Instruct",
+    "Qwen/Qwen3-VL-30B-A3B-Thinking",
+]
+def get_model(model_id: str) -> Model:
+    """Get the model"""
+    return InferenceClientModel(model_id=model_id)

cua2-core/src/cua2_core/services/agent_utils/prompt.py ADDED Viewed

	@@ -0,0 +1,136 @@

+from datetime import datetime
+E2B_SYSTEM_PROMPT_TEMPLATE = """You are a computer-use automation assistant controlling a full desktop remotely.
+The current date is <<current_date>>.
+<mission>
+Your objective is to complete a given task step-by-step by interacting with the desktop.
+At every step, you:
+1. Observe the latest screenshot (always analyze it carefully).
+2. Reflect briefly on what you see and what to do next.
+3. Produce **one precise action**, formatted exactly as Python code in a fenced block.
+You will receive a new screenshot after each action.
+Never skip the structure below.
+</mission>
+---
+<action_process>
+For every step, strictly follow this format:
+Short term goal: what you’re trying to accomplish in this step.
+What I see: describe key elements visible on the desktop.
+Reflection: reasoning that justifies your next move (mention errors or corrections if needed).
+**Action:**
+```python
+click(x, y)
+```<end_code>
+</action_process>
+---
+<environment>
+The desktop resolution is <<resolution_x>>x<<resolution_y>> pixels.
+You can only interact through the following tools:
+{%- for tool in tools.values() %}
+- **{{ tool.name }}**: {{ tool.description }}
+  - Inputs: {{ tool.inputs }}
+  - Returns: {{ tool.output_type }}
+{%- endfor %}
+If a task requires a specific application or website, **use**:
+```python
+open("app_or_url")
+```
+to launch it before interacting.
+Never manually click the browser icon — use `open_url()` directly for web pages.
+</environment>
+---
+<click_guidelines>
+- Always click using **real, visible coordinates** based on the current screenshot.
+- Click precisely **in the center** of the intended target (button, text, icon).
+- Avoid random or approximate coordinates.
+- If nothing changes after a click, check if you misclicked (green crosshair = last click position).
+- If a menu item shows a ▶ (triangle), it means it expands—click directly on the text, not the icon.
+- Use `scroll()` only within scrollable views (webpages, app lists, etc.).
+</click_guidelines>
+---
+<workflow_guidelines>
+- **ALWAYS START** by analyzing if the task requires opening an application or URL. If so, your **first action** must be:
+  - For websites: `open_url("https://google.com")`
+  - For applications: `open("app_name")`
+  - Never manually navigate to apps via clicking icons—use the open tools directly.
+- Complete one atomic action per step: e.g., **click**, **type**, or **wait**.
+- Never combine multiple tool calls in one step.
+- Validate that your previous action succeeded before continuing.
+- If the interface hasn't changed, adjust your strategy instead of repeating endlessly.
+- Use `wait(seconds)` for short delays if the interface is loading.
+- Always conclude with:
+```python
+final_answer("Answer the user's question or resume the task")
+```
+once the task is fully completed and verified. Answer the user's question or resume the task.
+</workflow_guidelines>
+---
+<example>
+Task: *Open a text editor and write “Hello World”*
+Step 1
+Short term goal: Launch the text editor.
+What I see: “Text Editor” visible under Accessories.
+Reflection: Clicking directly on “Text Editor”.
+Action:
+```python
+open("text_editor")
+```<end_code>
+Step 2
+Short term goal: click on the text editor page.
+What I see: Text editor page.
+Reflection: Click on the text editor page to write "Hello World".
+Action:
+```python
+click(52, 10)
+```<end_code>
+Step 3
+Short term goal: Type text.
+What I see: Empty notepad open.
+Reflection: Ready to type.
+Action:
+```python
+write("Hello World")
+```<end_code>
+Step 3
+Short term goal: Verify text and conclude.
+What I see: “Hello World” visible in notepad.
+Reflection: Task successful.
+Action:
+```python
+final_answer("The task is complete and the text 'Hello World' is visible in the notepad.")
+```<end_code>
+</example>
+---
+<core_principles>
+- Think visually and spatially.
+- Always ground your reasoning in what’s visible in the screenshot.
+- Never assume what’s on the next screen.
+- Always check the result of your last action.
+- Be deliberate, consistent, and patient.
+- **ALWAYS START** by analyzing if the task requires opening an application or URL. If so, your **first action** must be:
+  - For websites: `open_url("https://google.com")`
+  - For applications: `open("app_name")`
+  - **NEVER** manually navigate to apps via clicking icons—use the open tools directly.
+</core_principles>
+""".replace("<<current_date>>", datetime.now().strftime("%A, %d-%B-%Y"))

cua2-core/src/cua2_core/services/sandbox_service.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import asyncio
+import logging
+import os
+import time
+from datetime import datetime
+from typing import Any
+from e2b_desktop import Sandbox
+logger = logging.getLogger(__name__)
+SANDBOX_METADATA: dict[str, dict[str, Any]] = {}
+SANDBOX_TIMEOUT = 300
+WIDTH = 1280
+HEIGHT = 960
+class SandboxService:
+    def __init__(self, max_sandboxes: int = 50):
+        if not os.getenv("E2B_API_KEY"):
+            raise ValueError("E2B_API_KEY is not set")
+        self.max_sandboxes = max_sandboxes
+        self.sandboxes: dict[str, Sandbox] = {}
+        self.sandbox_metadata: dict[str, dict[str, Any]] = {}
+        self.sandbox_lock = asyncio.Lock()
+    async def acquire_sandbox(self, session_hash: str) -> Sandbox | None:
+        async with self.sandbox_lock:
+            current_time = datetime.now()
+            if (
+                session_hash in self.sandboxes
+                and session_hash in self.sandbox_metadata
+                and current_time - self.sandbox_metadata[session_hash]["created_at"]
+                < SANDBOX_TIMEOUT
+            ):
+                print(f"Reusing Sandbox for session {session_hash}")
+                self.sandbox_metadata[session_hash]["last_accessed"] = current_time
+                return self.sandboxes[session_hash]
+            if session_hash in self.sandboxes:
+                try:
+                    print(f"Closing expired sandbox for session {session_hash}")
+                    await asyncio.to_thread(self.sandboxes[session_hash].kill)
+                except Exception as e:
+                    print(f"Error closing expired sandbox: {str(e)}")
+            elif len(self.sandboxes) >= self.max_sandboxes:
+                return None
+            print(f"Creating new sandbox for session {session_hash}")
+            def create_and_setup_sandbox():
+                desktop = Sandbox.create(
+                    api_key=os.getenv("E2B_API_KEY"),
+                    resolution=(WIDTH, HEIGHT),
+                    dpi=96,
+                    timeout=SANDBOX_TIMEOUT,
+                    template="k0wmnzir0zuzye6dndlw",
+                )
+                desktop.stream.start(require_auth=True)
+                setup_cmd = """sudo mkdir -p /usr/lib/firefox-esr/distribution && echo '{"policies":{"OverrideFirstRunPage":"","OverridePostUpdatePage":"","DisableProfileImport":true,"DontCheckDefaultBrowser":true}}' | sudo tee /usr/lib/firefox-esr/distribution/policies.json > /dev/null"""
+                desktop.commands.run(setup_cmd)
+                time.sleep(3)
+                return desktop
+            desktop = await asyncio.to_thread(create_and_setup_sandbox)
+            print(f"Sandbox ID for session {session_hash} is {desktop.sandbox_id}.")
+            self.sandboxes[session_hash] = desktop
+            self.sandbox_metadata[session_hash] = {
+                "created_at": current_time,
+                "last_accessed": current_time,
+            }
+            return desktop
+    async def release_sandbox(self, session_hash: str):
+        async with self.sandbox_lock:
+            if session_hash in self.sandboxes:
+                print(f"Releasing sandbox for session {session_hash}")
+                await asyncio.to_thread(self.sandboxes[session_hash].kill)
+                del self.sandboxes[session_hash]
+                del self.sandbox_metadata[session_hash]
+    async def cleanup_sandboxes(self):
+        async with self.sandbox_lock:
+            for session_hash in list(self.sandboxes.keys()):
+                await asyncio.to_thread(self.sandboxes[session_hash].kill)
+                del self.sandboxes[session_hash]
+                del self.sandbox_metadata[session_hash]

cua2-core/src/cua2_core/websocket/websocket_manager.py CHANGED Viewed

@@ -1,11 +1,29 @@
 import asyncio
 import json
-from typing import Dict, Optional, Set
-from cua2_core.models.models import AgentTraceMetadata, WebSocketEvent
 from fastapi import WebSocket
 class WebSocketManager:
     """Manages WebSocket connections and broadcasting"""
@@ -29,90 +47,72 @@ class WebSocketManager:
             f"WebSocket disconnected. Total connections: {len(self.active_connections)}"
         )
-    async def send_personal_message(
-        self, message: WebSocketEvent, websocket: WebSocket
-    ):
         """Send a message to a specific WebSocket connection"""
         try:
-            await websocket.send_text(json.dumps(message.model_dump(mode="json")))
         except Exception as e:
             print(f"Error sending personal message: {e}")
             # Only disconnect if the connection is still in our set
             if websocket in self.active_connections:
                 self.disconnect(websocket)
-    async def broadcast(self, message: WebSocketEvent):
-        """Broadcast a message to all connected WebSockets"""
-        if not self.active_connections:
-            return
-        # Create a list of connections to remove if they fail
-        disconnected = []
-        for connection in self.active_connections.copy():
-            try:
-                await connection.send_text(json.dumps(message.model_dump(mode="json")))
-            except Exception as e:
-                print(f"Error broadcasting to connection: {e}")
-                disconnected.append(connection)
-        # Remove failed connections
-        for connection in disconnected:
-            if connection in self.active_connections:
-                self.disconnect(connection)
-    async def send_agent_start(self, content: str, message_id: str):
         """Send agent start event"""
-        event = WebSocketEvent(
-            type="agent_start", content=content, messageId=message_id
         )
-        await self.broadcast(event)
-    async def send_agent_progress(self, content: str, message_id: str):
         """Send agent progress event"""
-        event = WebSocketEvent(
-            type="agent_progress", content=content, messageId=message_id
         )
-        await self.broadcast(event)
     async def send_agent_complete(
-        self,
-        content: str,
-        message_id: str,
-        metadata: Optional[AgentTraceMetadata] = None,
     ):
         """Send agent complete event"""
-        event = WebSocketEvent(
-            type="agent_complete",
-            content=content,
-            messageId=message_id,
-            metadata=metadata,
-        )
-        await self.broadcast(event)
-    async def send_agent_error(self, content: str, message_id: Optional[str] = None):
         """Send agent error event"""
-        event = WebSocketEvent(
-            type="agent_error", content=content, messageId=message_id
-        )
-        await self.broadcast(event)
-    async def send_vnc_url_set(self, vnc_url: str, content: Optional[str] = None):
         """Send VNC URL set event"""
-        event = WebSocketEvent(
-            type="vnc_url_set",
-            content=content or f"VNC stream available at: {vnc_url}",
             vncUrl=vnc_url,
         )
-        await self.broadcast(event)
-    async def send_vnc_url_unset(self, content: Optional[str] = None):
         """Send VNC URL unset event (reset to default display)"""
-        event = WebSocketEvent(
-            type="vnc_url_unset",
-            content=content or "VNC stream disconnected, showing default display",
-        )
-        await self.broadcast(event)
     def get_connection_count(self) -> int:
         """Get the number of active connections"""

 import asyncio
 import json
+from typing import Dict, Set
+from cua2_core.models.models import (
+    ActiveTask,
+    AgentCompleteEvent,
+    AgentErrorEvent,
+    AgentProgressEvent,
+    AgentStartEvent,
+    AgentStep,
+    AgentTrace,
+    AgentTraceMetadata,
+    VncUrlSetEvent,
+    VncUrlUnsetEvent,
+    WebSocketEvent,
+)
 from fastapi import WebSocket
+class WebSocketException(Exception):
+    """Exception for WebSocket errors"""
+    pass
 class WebSocketManager:
     """Manages WebSocket connections and broadcasting"""
             f"WebSocket disconnected. Total connections: {len(self.active_connections)}"
         )
+    async def send_message(self, message: WebSocketEvent, websocket: WebSocket):
         """Send a message to a specific WebSocket connection"""
         try:
+            await websocket.send_text(
+                json.dumps(
+                    message.model_dump(mode="json", context={"actions_as_json": False})
+                )
+            )
         except Exception as e:
             print(f"Error sending personal message: {e}")
             # Only disconnect if the connection is still in our set
             if websocket in self.active_connections:
                 self.disconnect(websocket)
+            raise WebSocketException()
+    async def send_agent_start(self, active_task: ActiveTask, websocket: WebSocket):
         """Send agent start event"""
+        event = AgentStartEvent(
+            agentTrace=AgentTrace(
+                id=active_task.message_id,
+                timestamp=active_task.timestamp,
+                instruction=active_task.instruction,
+                modelId=active_task.model_id,
+                steps=active_task.steps,
+                traceMetadata=active_task.traceMetadata,
+                isRunning=True,
+            ),
         )
+        await self.send_message(event, websocket)
+    async def send_agent_progress(
+        self,
+        step: AgentStep,
+        metadata: AgentTraceMetadata,
+        websocket: WebSocket,
+    ):
         """Send agent progress event"""
+        event = AgentProgressEvent(
+            agentStep=step,
+            traceMetadata=metadata,
         )
+        await self.send_message(event, websocket)
     async def send_agent_complete(
+        self, metadata: AgentTraceMetadata, websocket: WebSocket
     ):
         """Send agent complete event"""
+        event = AgentCompleteEvent(traceMetadata=metadata)
+        await self.send_message(event, websocket)
+    async def send_agent_error(self, error: str, websocket: WebSocket):
         """Send agent error event"""
+        event = AgentErrorEvent(error=error)
+        await self.send_message(event, websocket)
+    async def send_vnc_url_set(self, vnc_url: str, websocket: WebSocket):
         """Send VNC URL set event"""
+        event = VncUrlSetEvent(
             vncUrl=vnc_url,
         )
+        await self.send_message(event, websocket)
+    async def send_vnc_url_unset(self, websocket: WebSocket):
         """Send VNC URL unset event (reset to default display)"""
+        event = VncUrlUnsetEvent()
+        await self.send_message(event, websocket)
     def get_connection_count(self) -> int:
         """Get the number of active connections"""

cua2-core/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Tests for cua2-core"""

cua2-core/tests/test_routes.py ADDED Viewed

	@@ -0,0 +1,311 @@

+from unittest.mock import Mock
+import pytest
+from cua2_core.models.models import AvailableModelsResponse, UpdateStepResponse
+from cua2_core.routes.routes import router
+from cua2_core.services.agent_service import AgentService
+from cua2_core.services.agent_utils.get_model import AVAILABLE_MODELS
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.testclient import TestClient
+@pytest.fixture
+def mock_agent_service():
+    """Fixture to create a mocked AgentService"""
+    service = Mock(spec=AgentService)
+    service.active_tasks = {}
+    service.update_trace_step = Mock()
+    return service
+@pytest.fixture
+def mock_websocket_manager():
+    """Fixture to create a mocked WebSocketManager"""
+    manager = Mock()
+    manager.get_connection_count = Mock(return_value=0)
+    return manager
+@pytest.fixture
+def app(mock_agent_service, mock_websocket_manager):
+    """Fixture to create FastAPI app with mocked services"""
+    # Create a test FastAPI app
+    test_app = FastAPI(title="Test App")
+    # Add CORS middleware
+    test_app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    # Include the router
+    test_app.include_router(router)
+    # Mock the services in app state
+    test_app.state.agent_service = mock_agent_service
+    test_app.state.websocket_manager = mock_websocket_manager
+    return test_app
+@pytest.fixture
+def client(app):
+    """Fixture to create test client"""
+    return TestClient(app)
+class TestGetAvailableModels:
+    """Test suite for GET /models endpoint"""
+    def test_get_available_models_success(self, client):
+        """Test successful retrieval of available models"""
+        response = client.get("/models")
+        assert response.status_code == 200
+        data = response.json()
+        assert "models" in data
+        assert isinstance(data["models"], list)
+        assert len(data["models"]) > 0
+        # Verify models match AVAILABLE_MODELS
+        assert data["models"] == AVAILABLE_MODELS
+    def test_get_available_models_structure(self, client):
+        """Test that response matches AvailableModelsResponse schema"""
+        response = client.get("/models")
+        assert response.status_code == 200
+        data = response.json()
+        # Validate against Pydantic model
+        models_response = AvailableModelsResponse(**data)
+        assert models_response.models == AVAILABLE_MODELS
+    def test_get_available_models_content(self, client):
+        """Test that specific expected models are included"""
+        response = client.get("/models")
+        assert response.status_code == 200
+        data = response.json()
+        # Check for some specific models
+        expected_models = [
+            "Qwen/Qwen3-VL-2B-Instruct",
+            "Qwen/Qwen3-VL-30B-A3B-Instruct",
+        ]
+        for model in expected_models:
+            assert model in data["models"]
+class TestUpdateTraceStep:
+    """Test suite for PATCH /traces/{trace_id}/steps/{step_id} endpoint"""
+    def test_update_trace_step_success(self, client, mock_agent_service):
+        """Test successful step update"""
+        trace_id = "test-trace-123"
+        step_id = "1"
+        request_data = {"step_evaluation": "like"}
+        # Mock the service method to succeed
+        mock_agent_service.update_trace_step.return_value = None
+        response = client.patch(
+            f"/traces/{trace_id}/steps/{step_id}", json=request_data
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert data["message"] == "Step updated successfully"
+        # Verify the service was called correctly
+        mock_agent_service.update_trace_step.assert_called_once_with(
+            trace_id=trace_id, step_id=step_id, step_evaluation="like"
+        )
+    def test_update_trace_step_with_dislike(self, client, mock_agent_service):
+        """Test step update with 'dislike' evaluation"""
+        trace_id = "test-trace-456"
+        step_id = "2"
+        request_data = {"step_evaluation": "dislike"}
+        mock_agent_service.update_trace_step.return_value = None
+        response = client.patch(
+            f"/traces/{trace_id}/steps/{step_id}", json=request_data
+        )
+        assert response.status_code == 200
+        mock_agent_service.update_trace_step.assert_called_once_with(
+            trace_id=trace_id, step_id=step_id, step_evaluation="dislike"
+        )
+    def test_update_trace_step_with_neutral(self, client, mock_agent_service):
+        """Test step update with 'neutral' evaluation"""
+        trace_id = "test-trace-789"
+        step_id = "3"
+        request_data = {"step_evaluation": "neutral"}
+        mock_agent_service.update_trace_step.return_value = None
+        response = client.patch(
+            f"/traces/{trace_id}/steps/{step_id}", json=request_data
+        )
+        assert response.status_code == 200
+        mock_agent_service.update_trace_step.assert_called_once_with(
+            trace_id=trace_id, step_id=step_id, step_evaluation="neutral"
+        )
+    def test_update_trace_step_invalid_evaluation(self, client, mock_agent_service):
+        """Test step update with invalid evaluation value"""
+        trace_id = "test-trace-123"
+        step_id = "1"
+        request_data = {"step_evaluation": "invalid"}
+        response = client.patch(
+            f"/traces/{trace_id}/steps/{step_id}", json=request_data
+        )
+        # Should fail validation
+        assert response.status_code == 422
+    def test_update_trace_step_value_error(self, client, mock_agent_service):
+        """Test step update when service raises ValueError"""
+        trace_id = "test-trace-123"
+        step_id = "invalid"
+        request_data = {"step_evaluation": "like"}
+        # Mock the service to raise ValueError
+        mock_agent_service.update_trace_step.side_effect = ValueError(
+            "Invalid step_id format"
+        )
+        response = client.patch(
+            f"/traces/{trace_id}/steps/{step_id}", json=request_data
+        )
+        assert response.status_code == 400
+        assert "Invalid step_id format" in response.json()["detail"]
+    def test_update_trace_step_not_found(self, client, mock_agent_service):
+        """Test step update when trace is not found"""
+        trace_id = "nonexistent-trace"
+        step_id = "1"
+        request_data = {"step_evaluation": "like"}
+        # Mock the service to raise FileNotFoundError
+        mock_agent_service.update_trace_step.side_effect = FileNotFoundError(
+            "Trace not found"
+        )
+        response = client.patch(
+            f"/traces/{trace_id}/steps/{step_id}", json=request_data
+        )
+        assert response.status_code == 404
+        assert "Trace not found" in response.json()["detail"]
+    def test_update_trace_step_step_not_found(self, client, mock_agent_service):
+        """Test step update when step doesn't exist in trace"""
+        trace_id = "test-trace-123"
+        step_id = "999"
+        request_data = {"step_evaluation": "like"}
+        # Mock the service to raise ValueError for step not found
+        mock_agent_service.update_trace_step.side_effect = ValueError(
+            "Step 999 not found in trace"
+        )
+        response = client.patch(
+            f"/traces/{trace_id}/steps/{step_id}", json=request_data
+        )
+        assert response.status_code == 400
+        assert "Step 999 not found in trace" in response.json()["detail"]
+    def test_update_trace_step_missing_request_body(self, client, mock_agent_service):
+        """Test step update with missing request body"""
+        trace_id = "test-trace-123"
+        step_id = "1"
+        response = client.patch(f"/traces/{trace_id}/steps/{step_id}", json={})
+        # Should fail validation
+        assert response.status_code == 422
+    def test_update_trace_step_with_special_characters(
+        self, client, mock_agent_service
+    ):
+        """Test step update with trace_id containing special characters"""
+        trace_id = "trace-01K960P4FA2BVC058EZDXQEB5E-Qwen-Qwen3-VL-30B-A3B-Instruct"
+        step_id = "1"
+        request_data = {"step_evaluation": "like"}
+        mock_agent_service.update_trace_step.return_value = None
+        response = client.patch(
+            f"/traces/{trace_id}/steps/{step_id}", json=request_data
+        )
+        assert response.status_code == 200
+        mock_agent_service.update_trace_step.assert_called_once_with(
+            trace_id=trace_id, step_id=step_id, step_evaluation="like"
+        )
+    def test_update_trace_step_response_structure(self, client, mock_agent_service):
+        """Test that response matches UpdateStepResponse schema"""
+        trace_id = "test-trace-123"
+        step_id = "1"
+        request_data = {"step_evaluation": "like"}
+        mock_agent_service.update_trace_step.return_value = None
+        response = client.patch(
+            f"/traces/{trace_id}/steps/{step_id}", json=request_data
+        )
+        assert response.status_code == 200
+        data = response.json()
+        # Validate against Pydantic model
+        update_response = UpdateStepResponse(**data)
+        assert update_response.success is True
+        assert update_response.message == "Step updated successfully"
+class TestRoutesIntegration:
+    """Integration tests for multiple routes"""
+    def test_models_endpoint_available(self, client):
+        """Test that models endpoint is available"""
+        response = client.get("/models")
+        assert response.status_code == 200
+    def test_update_step_endpoint_available(self, client, mock_agent_service):
+        """Test that update step endpoint is available"""
+        mock_agent_service.update_trace_step.return_value = None
+        response = client.patch(
+            "/traces/test/steps/1", json={"step_evaluation": "like"}
+        )
+        assert response.status_code == 200
+    def test_invalid_route(self, client):
+        """Test accessing an invalid route"""
+        response = client.get("/invalid-route")
+        assert response.status_code == 404
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

cua2-front/src/components/mock/TaskButton.tsx CHANGED Viewed

@@ -12,8 +12,8 @@ export const TaskButton: React.FC<TaskButtonProps> = ({ isAgentProcessing, isCon
       onClick={() => {
         if (!isAgentProcessing && isConnected) {
           onSendTask(
-            "Complete the online form by clicking through the required fields",
-            "anthropic/claude-sonnet-4-5-20250929"
           );
         }
       }}
@@ -56,7 +56,7 @@ export const TaskButton: React.FC<TaskButtonProps> = ({ isAgentProcessing, isCon
             )}
           </div>
           <p style={{ fontSize: '15px', fontWeight: 500, color: '#1f2937' }}>
-            Complete the online form by clicking through the required fields
           </p>
         </div>
         <div style={{
@@ -67,7 +67,7 @@ export const TaskButton: React.FC<TaskButtonProps> = ({ isAgentProcessing, isCon
         }}>
           <span style={{ fontSize: '11px', fontWeight: 600, color: 'rgba(0, 0, 0, 0.6)', textTransform: 'uppercase', letterSpacing: '1px' }}>Model</span>
           <p style={{ fontSize: '12px', fontWeight: 600, color: '#1f2937', marginTop: '2px', whiteSpace: 'nowrap' }}>
-            claude-sonnet-4-5-20250929
           </p>
         </div>
       </div>

       onClick={() => {
         if (!isAgentProcessing && isConnected) {
           onSendTask(
+            "Find the price of a NVIDIA RTX 4090 GPU",
+            "Qwen/Qwen3-VL-30B-A3B-Instruct"
           );
         }
       }}
             )}
           </div>
           <p style={{ fontSize: '15px', fontWeight: 500, color: '#1f2937' }}>
+            Find the price of a NVIDIA RTX 4090 GPU
           </p>
         </div>
         <div style={{
         }}>
           <span style={{ fontSize: '11px', fontWeight: 600, color: 'rgba(0, 0, 0, 0.6)', textTransform: 'uppercase', letterSpacing: '1px' }}>Model</span>
           <p style={{ fontSize: '12px', fontWeight: 600, color: '#1f2937', marginTop: '2px', whiteSpace: 'nowrap' }}>
+            Qwen/Qwen3-VL-30B-A3B-Instruct
           </p>
         </div>
       </div>

cua2-front/src/pages/Index.tsx CHANGED Viewed

@@ -1,16 +1,14 @@
-import React from 'react';
 import { useWebSocket } from '@/hooks/useWebSocket';
-import { WebSocketEvent } from '@/types/agent';
 import { useState } from 'react';
-import { AgentTrace, AgentStep } from '@/types/agent';
 import { ulid } from 'ulid';
-import { Header, VNCStream, Metadata, StackSteps } from '@/components/mock';
 const Index = () => {
   const [trace, setTrace] = useState<AgentTrace>();
   const [isAgentProcessing, setIsAgentProcessing] = useState(false);
   const [vncUrl, setVncUrl] = useState<string>('');
-  const [selectedModelId, setSelectedModelId] = useState<string>("claude-sonnet-4-5-20250929");
   // #################### WebSocket Connection ########################
@@ -51,12 +49,12 @@ const Index = () => {
         setIsAgentProcessing(false);
         setTrace(trace => {
           return trace.id === event.traceMetadata.traceId
-              ? {
-                ...trace,
-                isRunning: false,
-                metadata: event.traceMetadata,
-              }
-              : trace;
         });
         console.log('Agent complete received:', event.traceMetadata);
         break;

+import { Header, Metadata, StackSteps, VNCStream } from '@/components/mock';
 import { useWebSocket } from '@/hooks/useWebSocket';
+import { AgentStep, AgentTrace, WebSocketEvent } from '@/types/agent';
 import { useState } from 'react';
 import { ulid } from 'ulid';
 const Index = () => {
   const [trace, setTrace] = useState<AgentTrace>();
   const [isAgentProcessing, setIsAgentProcessing] = useState(false);
   const [vncUrl, setVncUrl] = useState<string>('');
+  const [selectedModelId, setSelectedModelId] = useState<string>("Qwen/Qwen3-VL-30B-A3B-Instruct");
   // #################### WebSocket Connection ########################
         setIsAgentProcessing(false);
         setTrace(trace => {
           return trace.id === event.traceMetadata.traceId
+            ? {
+              ...trace,
+              isRunning: false,
+              metadata: event.traceMetadata,
+            }
+            : trace;
         });
         console.log('Agent complete received:', event.traceMetadata);
         break;

cua2-front/src/types/agent.ts CHANGED Viewed

@@ -82,3 +82,18 @@ export interface UserTaskMessage {
   type: 'user_task';
   trace: AgentTrace;
 }

   type: 'user_task';
   trace: AgentTrace;
 }
+// #################### API Routes Types ########################
+export interface AvailableModelsResponse {
+  models: string[];
+}
+export interface UpdateStepRequest {
+  step_evaluation: 'like' | 'dislike' | 'neutral';
+}
+export interface UpdateStepResponse {
+  success: boolean;
+  message: string;
+}