| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import json |
| from typing import Any, Optional |
| from uuid import uuid4 |
|
|
| from verl.utils.rollout_trace import rollout_trace_op |
|
|
| from .schemas import OpenAIFunctionToolSchema, ToolResponse |
|
|
|
|
| class BaseTool: |
| """Base class for tools. |
| |
| A tool should support the following methods: |
| |
| - `get_openai_tool_schema`: return the tool schema in OpenAI format. |
| - `create`: create a tool instance for a trajectory. |
| - `execute`: execute the tool. |
| - `calc_reward`: calculate the reward respect to tool state. |
| - `release`: release the tool instance. |
| """ |
|
|
| def __init__(self, config: dict, tool_schema: OpenAIFunctionToolSchema): |
| self.config = config |
| self.tool_schema = tool_schema or self.get_openai_tool_schema() |
| assert self.tool_schema is not None, "Tool schema is not set!" |
| self.name = self.tool_schema.function.name |
| print(json.dumps(self.tool_schema.model_dump(exclude_unset=True, exclude_none=True), indent=2)) |
|
|
| def get_openai_tool_schema(self) -> OpenAIFunctionToolSchema: |
| return self.tool_schema |
|
|
| async def create(self, instance_id: Optional[str] = None, **kwargs) -> tuple[str, ToolResponse]: |
| """Create a tool instance. |
| |
| Args: |
| instance_id: The instance id of the tool. |
| |
| Returns: |
| The instance id of the tool. |
| tool_creation_response: The response of the tool when creating the instance. |
| """ |
| if instance_id is None: |
| return str(uuid4()), ToolResponse() |
| else: |
| return instance_id, ToolResponse() |
|
|
| @rollout_trace_op |
| async def execute(self, instance_id: str, parameters: dict[str, Any], **kwargs) -> tuple[ToolResponse, float, dict]: |
| """Execute the tool. |
| |
| Args: |
| instance_id: The instance id of the tool. |
| parameters: The json string of the parameters of the tool. |
| |
| Returns: tool_response, tool_reward_score, tool_metrics |
| tool_response: The ToolResponse object containing text, image, and/or video content. |
| tool_reward_score: The step reward score of the tool. |
| tool_metrics: The metrics of the tool. |
| """ |
| return ToolResponse(text="Updated the tool state."), 0.0, {} |
|
|
| async def calc_reward(self, instance_id: str, **kwargs) -> float: |
| """Calculate the reward of the tool. |
| |
| Args: |
| instance_id: The instance id of the tool. |
| |
| Returns: |
| The reward of the tool. |
| """ |
| return 0.0 |
|
|
| async def release(self, instance_id: str, **kwargs) -> None: |
| """Release the tool instance. |
| |
| Args: |
| instance_id: The instance id of the tool. |
| """ |
| pass |
|
|