{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/export/agentstudio-family/miniconda3/v2lite-train/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import os\n", "import json\n", "from typing import List, Union\n", "from pprint import pprint\n", "import torch\n", "import inspect\n", "from collections import defaultdict\n", "from transformers import AutoTokenizer\n", "from agentstudio.agentstudio_utils import *\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Global Variables" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def get_current_weather(location: str, format: str, date: int):\n", " \"\"\"\n", " Get the current weather\n", "\n", " Args:\n", " location: The city and state, e.g. San Francisco, CA\n", " format: The temperature unit to use. Infer this from the users location. (choices: [\"celsius\", \"fahrenheit\"])\n", " date: a specific date to get the weather for\n", " \"\"\"\n", " pass\n", "\n", "def get_current_temperature(location: str, unit: str, longitude: float, latitude: float) -> float:\n", " \"\"\"\n", " Get the current temperature at a location.\n", " \n", " Args:\n", " location: The location to get the temperature for, in the format \"City, Country\"\n", " unit: The unit to return the temperature in. (choices: [\"celsius\", \"fahrenheit\"])\n", " longitude: the longitude of the location\n", " latitude: the latitude of the location\n", " Returns:\n", " The current temperature at the specified location in the specified units, as a float.\n", " \"\"\"\n", " return 22. # A real function should probably actually get the temperature!\n", "\n", "def get_current_wind_speed(location: str) -> float:\n", " \"\"\"\n", " Get the current wind speed in km/h at a given location.\n", " \n", " Args:\n", " location: The location to get the temperature for, in the format \"City, Country\"\n", " Returns:\n", " The current wind speed at the given location in km/h, as a float.\n", " \"\"\"\n", " return 6. # A real function should probably actually get the wind speed!\n", "\n", "tools_simple_str = \"\"\"\n", "def get_current_weather(location: str, format: str, date: int):\n", " '''\n", " Get the current weather\n", "\n", " Args:\n", " location: The city and state, e.g. San Francisco, CA\n", " format: The temperature unit to use. Infer this from the users location. (choices: [\"celsius\", \"fahrenheit\"])\n", " date: a specific date to get the weather for\n", " '''\n", " pass\n", "\n", "def get_current_temperature(location: str, unit: str, longitude: float, latitude: float) -> float:\n", " '''\n", " Get the current temperature at a location.\n", " \n", " Args:\n", " location: The location to get the temperature for, in the format \"City, Country\"\n", " unit: The unit to return the temperature in. (choices: [\"celsius\", \"fahrenheit\"])\n", " longitude: the longitude of the location\n", " latitude: the latitude of the location\n", " Returns:\n", " The current temperature at the specified location in the specified units, as a float.\n", " '''\n", " return 22. # A real function should probably actually get the temperature!\n", "\n", "def get_current_wind_speed(location: str) -> float:\n", " '''\n", " Get the current wind speed in km/h at a given location.\n", " \n", " Args:\n", " location: The location to get the temperature for, in the format \"City, Country\"\n", " Returns:\n", " The current wind speed at the given location in km/h, as a float.\n", " '''\n", " return 6. # A real function should probably actually get the wind speed!\n", "\"\"\"\n", "\n", "tools = [get_current_weather, get_current_temperature, get_current_wind_speed]\n", "tools_str = json.dumps([{\"name\": tool.__name__, \"parameters\": str(inspect.signature(tool)), \"description\": tool.__doc__.strip()} for tool in tools], indent=4)\n", "\n", "BASE_XLAM_DIR = \"/export/agentstudio-family/checkpoints/xlam_v1\"\n", "model_list = [\n", " \"xlam_7b_r\",\n", " \"xlam_8x7b_r\",\n", " \"xlam_8x22b_r\"\n", "]\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### DO NOT RUN THIS CELL as JIANGUO has updated for xLAM 1.0 Series (xLAM Tool Call Template)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "xlam_chat_template = \"\"\"\n", "{%- if messages[0][\"role\"] == \"system\" %}\n", " {%- set system_message = messages[0][\"content\"] %}\n", " {%- if messages[1][\"role\"] == \"system\" %}\n", " {%- set format_message = messages[1][\"content\"] %}\n", " {%- set loop_messages = messages[2:] %}\n", " {%- else %}\n", " {%- set loop_messages = messages[1:] %}\n", " {%- endif %}\n", "{%- else %}\n", " {%- set loop_messages = messages %}\n", "{%- endif %}\n", "{%- if not tools is defined %}\n", " {%- set tools = none %}\n", "{%- endif %}\n", "\n", "{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n", "{%- set ns = namespace() %}\n", "{%- set ns.index = 0 %}\n", "{%- for message in loop_messages %}\n", " {%- if ((message[\"role\"] == \"user\") and (ns.index % 2 != 0)) or (message[\"role\"] not in [\"user\", \"assistant\", \"tool\", \"tool_results\"]) %}\n", " {{- raise_exception(\"After the optional system message, conversation roles can only be from user/assistant/tool; After each tool message, the next message must be from the assistant\") }}\n", " {%- endif %}\n", " {%- set ns.index = ns.index + 1 %}\n", "{%- endfor %}\n", "\n", "{{- bos_token }}\n", "{{- \"[INST]\" }}\n", "{%- if system_message is not defined %}\n", " {% set system_message %}\n", "You are an expert in composing functions. You are given a question and a set of possible functions. \n", "Based on the question, you will need to make one or more function/tool calls to achieve the purpose. \n", "If none of the functions can be used, point it out and refuse to answer. \n", "If the given question lacks the parameters required by the function, also point it out.{% endset %}\n", "{%- endif %}\n", "{{- \"\\n[BEGIN OF TASK INSTRUCTION]\\n\" + system_message + \"\\n[END OF TASK INSTRUCTION]\\n\\n\" }}\n", "\n", "{%- if tools is not none %}\n", " {{- \"[BEGIN OF AVAILABLE_TOOLS]\\n\" }}\n", " {{- tools|string }}\n", " {{- \"\\n[END OF AVAILABLE_TOOLS]\\n\\n\" }}\n", "{%- endif %}\n", " \n", "{%- if format_message is not defined %}\n", " {% set format_message %}\n", "Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n", "```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n", " {% endset %}\n", "{%- endif %}\n", "{{- \"[BEGIN OF FORMAT INSTRUCTION]\\n\" + format_message + \"[END OF FORMAT INSTRUCTION]\\n\\n\" }}\n", "\n", "{%- if loop_messages[0][\"role\"] == \"user\" %}\n", " {%- set query = loop_messages[0][\"content\"] %}\n", " {{- \"[BEGIN OF QUERY]\\n\" + query + \"\\n[END OF QUERY]\\n\" }}\n", " {%- set loop_messages = loop_messages[1:] %}\n", "{%- endif %}\n", "\n", "{% if loop_messages %}\n", " {{- \"[BEGIN OF HISTORY STEPS]\\n[\" }}\n", " {%- set step_id = namespace(value=1) %}\n", " {%- for message in loop_messages %}\n", " {%- if message[\"role\"] == \"assistant\" %}\n", " {%- if message.tool_calls is defined and message.tool_calls is not none %}\n", " {% if message.tool_calls is iterable and (message.tool_calls is not string and message.tool_calls is not mapping) %}\n", " {{- \"{'thought':\" + message.content|tojson + \", 'tool_calls':\" + message.tool_calls|tojson + \", 'step_id':\" + step_id.value|string + \",\" }}\n", " {%- else %}\n", " {{- raise_exception(\"The tool_calls must be a list!\") }}\n", " {%- endif %}\n", " {%- else %}\n", " {{- \"{'thought':\" + message.content|tojson + \", 'tool_calls':[]\" + \", 'step_id':\" + step_id.value|string + \",\" }}\n", " {%- endif %}\n", " {%- if loop.nextitem is not defined %}\n", " {{- \" 'next_observation':''}\" }}\n", " {%- elif loop.nextitem[\"role\"] == \"user\" %}\n", " {{- \" 'next_observation':''\" }}\n", " {%- elif loop.nextitem[\"role\"] != \"tool_results\" and loop.nextitem[\"role\"] != \"tool\" %}\n", " {{- \" 'next_observation':''},\" }}\n", " {%- endif %}\n", " {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n", " {{- \" 'next_observation':\" + message.content|tojson }}\n", " {%- if loop.nextitem is defined and loop.nextitem[\"role\"] != \"user\" %}\n", " {{- \"}, \" }}\n", " {%- elif not loop.nextitem is defined %}\n", " {{- \"} \" }}\n", " {%- endif %}\n", " {%- set step_id.value = step_id.value + 1 %}\n", " {%- elif message[\"role\"] == \"user\" %}\n", " {{- \", 'user_input':\" + message.content|tojson }}\n", " {%- if loop.nextitem is defined %}\n", " {{- \"}, \" }}\n", " {%- else %}\n", " {{- \"} \" }}\n", " {%- endif %}\n", " {%- set step_id.value = step_id.value + 1 %}\n", " {%- endif %}\n", " {%- endfor %}\n", " {{- \"]\\n[END OF HISTORY STEPS]\\n\" }}\n", "{%- endif %}\n", "{{- \"[/INST]\" }}\n", "\"\"\".strip()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### DO NOT RUN THIS CELL as JIANGUO has updated for xLAM 1.0 Series(Update Tokenizer)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Updated tokenizer for /export/agentstudio-family/checkpoints/xlam_v1/xlam_7b_r\n", "Updated tokenizer for /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x7b_r\n", "Updated tokenizer for /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n" ] } ], "source": [ "def check_tokenizer(model_list, BASE_XLAM_DIR):\n", " for checkpoint in model_list:\n", " tokenize_config = open_json(os.path.join(BASE_XLAM_DIR, checkpoint, \"tokenizer_config.json\"))\n", " \n", " del tokenize_config[\"chat_template\"]\n", " original_tokenize_config = open_json(os.path.join(BASE_XLAM_DIR, checkpoint, \"original_tokenizer_config.json\"))\n", " del original_tokenize_config[\"chat_template\"]\n", " if tokenize_config != original_tokenize_config:\n", " raise ValueError(f\"Tokenizer config is not the same for {checkpoint}\")\n", "\n", "def update_tokenizer(model_list, BASE_XLAM_DIR):\n", " \"\"\"\n", " Chat_templates: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md\n", " We update Mixtral-8x22b-inst-v0.1 to better handle the system prompt in https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1/discussions/54\n", " \"\"\"\n", " for checkpoint in model_list:\n", " checkpoint = os.path.join(BASE_XLAM_DIR, checkpoint)\n", " if os.path.exists(checkpoint):\n", " raise ValueError(\"Tokenizer is already updated for xLAM 1.0 series: {}. Contact ❤❤❤Jianguo Zhang❤❤❤ for more details! \".format(checkpoint))\n", " \n", " if \"8x22b\" in checkpoint:\n", " original_tokenize_config = open_json(os.path.join(checkpoint, \"original_tokenizer_config_fixed.json\"))\n", " else:\n", " original_tokenize_config = open_json(os.path.join(checkpoint, \"original_tokenizer_config.json\"))\n", " chat_template = {\n", " \"default\": original_tokenize_config[\"chat_template\"],\n", " \"tool_use\": xlam_chat_template\n", " }\n", " \n", " original_tokenize_config[\"chat_template\"] = chat_template\n", " save_json(os.path.join(checkpoint, \"tokenizer_config.json\"), original_tokenize_config)\n", " print(f\"Updated tokenizer for {checkpoint}\")\n", "\n", "update_tokenizer(model_list, BASE_XLAM_DIR)\n", "check_tokenizer(model_list, BASE_XLAM_DIR)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Verify the tokenizer" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def verify_tokenizer(messages, model_list, tools, BASE_XLAM_DIR, fc_modes, file_suffix):\n", " for fc_mode in fc_modes:\n", " xlam_models_prompt = defaultdict(str)\n", "\n", " for checkpoint in model_list: \n", " checkpoint = os.path.join(BASE_XLAM_DIR, checkpoint)\n", " tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", " \n", " if fc_mode:\n", " model_input = tokenizer.apply_chat_template(\n", " messages,\n", " tools=tools,\n", " tokenize=False\n", " )\n", " else:\n", " model_input = tokenizer.apply_chat_template(\n", " messages,\n", " tokenize=False\n", " )\n", " \n", " if \"xlam_8x22b_r\" in checkpoint:\n", " print(\"*\"*100)\n", " print(\"Checkpoint: \", checkpoint)\n", " # print(\"Chat Template: \", tokenizer.chat_template)\n", " \n", " print(\"-\" * 100)\n", " print(\"FC Mode: \", fc_mode)\n", " print(\"-\" * 100)\n", " print(model_input)\n", " xlam_models_prompt[checkpoint] = model_input\n", " \n", " # save the final message for reference\n", " xlam_models_prompt[\"messages\"] = messages\n", " if fc_mode:\n", " xlam_models_prompt[\"avaliable_tools\"] = tools_str\n", " xlam_models_prompt[\"avaliable_tools-easy_to_read\"] = tools_simple_str\n", " \n", " if fc_mode:\n", " save_json(f\"xlam_chat_template-function_call_mode-{file_suffix}.json\", xlam_models_prompt)\n", " else:\n", " save_json(f\"xlam_chat_template-non_function_call_mode-{file_suffix}.json\", xlam_models_prompt)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Example 1 (General Chat without a System Message at the Beginning)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "****************************************************************************************************\n", "Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n", "----------------------------------------------------------------------------------------------------\n", "FC Mode: True\n", "----------------------------------------------------------------------------------------------------\n", "[INST]\n", "[BEGIN OF TASK INSTRUCTION]\n", "You are an expert in composing functions. You are given a question and a set of possible functions. \n", "Based on the question, you will need to make one or more function/tool calls to achieve the purpose. \n", "If none of the functions can be used, point it out and refuse to answer. \n", "If the given question lacks the parameters required by the function, also point it out.\n", "[END OF TASK INSTRUCTION]\n", "\n", "[BEGIN OF AVAILABLE_TOOLS]\n", "[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}, 'date': {'type': 'integer', 'description': 'a specific date to get the weather for'}}, 'required': ['location', 'format', 'date']}}}, {'type': 'function', 'function': {'name': 'get_current_temperature', 'description': 'Get the current temperature at a location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The unit to return the temperature in.'}, 'longitude': {'type': 'number', 'description': 'the longitude of the location'}, 'latitude': {'type': 'number', 'description': 'the latitude of the location'}}, 'required': ['location', 'unit', 'longitude', 'latitude']}, 'return': {'type': 'number', 'description': 'The current temperature at the specified location in the specified units, as a float.'}}}, {'type': 'function', 'function': {'name': 'get_current_wind_speed', 'description': 'Get the current wind speed in km/h at a given location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}}, 'required': ['location']}, 'return': {'type': 'number', 'description': 'The current wind speed at the given location in km/h, as a float.'}}}]\n", "[END OF AVAILABLE_TOOLS]\n", "\n", "[BEGIN OF FORMAT INSTRUCTION]\n", "Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n", "```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n", "[END OF FORMAT INSTRUCTION]\n", "\n", "[BEGIN OF QUERY]\n", "My girlfriend Mary and I want to go to Disney in LA. Can you help us plan the trip?\n", "[END OF QUERY]\n", "\n", "[BEGIN OF HISTORY STEPS]\n", "[{'thought':\"Sure! I can help with booking flights, finding weather information, and checking Disney details. First, let's choose a good date. Do you want to check the weather to find a sunny and warm day?\", 'tool_calls':[], 'step_id':1, 'next_observation':'', 'user_input':\"Yes, we'd love that. We prefer sunny days with warm temperatures.\"}, {'thought':\"Got it. Let me check the weather forecast for Los Angeles to help you find the best date.\", 'tool_calls':[], 'step_id':2, 'next_observation':'', 'user_input':\"Sounds great! Thanks for your help.\"} ]\n", "[END OF HISTORY STEPS]\n", "[/INST]\n", "****************************************************************************************************\n", "Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n", "----------------------------------------------------------------------------------------------------\n", "FC Mode: False\n", "----------------------------------------------------------------------------------------------------\n", "[INST] My girlfriend Mary and I want to go to Disney in LA. Can you help us plan the trip?[/INST] Sure! I can help with booking flights, finding weather information, and checking Disney details. First, let's choose a good date. Do you want to check the weather to find a sunny and warm day?[INST] Yes, we'd love that. We prefer sunny days with warm temperatures.[/INST] Got it. Let me check the weather forecast for Los Angeles to help you find the best date.[INST] Sounds great! Thanks for your help.[/INST]\n" ] } ], "source": [ "messages = [\n", " {\"role\": \"user\", \"content\": \"My girlfriend Mary and I want to go to Disney in LA. Can you help us plan the trip?\"},\n", " \n", " {\"role\": \"assistant\", \"content\": \"Sure! I can help with booking flights, finding weather information, and checking Disney details. First, let's choose a good date. Do you want to check the weather to find a sunny and warm day?\"},\n", " \n", " {\"role\": \"user\", \"content\": \"Yes, we'd love that. We prefer sunny days with warm temperatures.\"},\n", " \n", " {\"role\": \"assistant\", \"content\": \"Got it. Let me check the weather forecast for Los Angeles to help you find the best date.\"}, # example 1\n", "\n", " {\"role\": \"user\", \"content\": \"Sounds great! Thanks for your help.\"},\n", "]\n", "\n", "verify_tokenizer(messages, model_list, tools, BASE_XLAM_DIR, fc_modes=[True, False], file_suffix=\"example_1\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Example 2 (Function Call Where Tool_Calls is a List, i.e., Similar to OpenAI Requirements)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "****************************************************************************************************\n", "Checkpoint: /export/agentstudio-family/checkpoints/xlam_v1/xlam_8x22b_r\n", "----------------------------------------------------------------------------------------------------\n", "FC Mode: True\n", "----------------------------------------------------------------------------------------------------\n", "[INST]\n", "[BEGIN OF TASK INSTRUCTION]\n", "Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\n", "[END OF TASK INSTRUCTION]\n", "\n", "[BEGIN OF AVAILABLE_TOOLS]\n", "[{'type': 'function', 'function': {'name': 'get_current_weather', 'description': 'Get the current weather', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The city and state, e.g. San Francisco, CA'}, 'format': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The temperature unit to use. Infer this from the users location.'}, 'date': {'type': 'integer', 'description': 'a specific date to get the weather for'}}, 'required': ['location', 'format', 'date']}}}, {'type': 'function', 'function': {'name': 'get_current_temperature', 'description': 'Get the current temperature at a location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}, 'unit': {'type': 'string', 'enum': ['celsius', 'fahrenheit'], 'description': 'The unit to return the temperature in.'}, 'longitude': {'type': 'number', 'description': 'the longitude of the location'}, 'latitude': {'type': 'number', 'description': 'the latitude of the location'}}, 'required': ['location', 'unit', 'longitude', 'latitude']}, 'return': {'type': 'number', 'description': 'The current temperature at the specified location in the specified units, as a float.'}}}, {'type': 'function', 'function': {'name': 'get_current_wind_speed', 'description': 'Get the current wind speed in km/h at a given location.', 'parameters': {'type': 'object', 'properties': {'location': {'type': 'string', 'description': 'The location to get the temperature for, in the format \"City, Country\"'}}, 'required': ['location']}, 'return': {'type': 'number', 'description': 'The current wind speed at the given location in km/h, as a float.'}}}]\n", "[END OF AVAILABLE_TOOLS]\n", "\n", "[BEGIN OF FORMAT INSTRUCTION]\n", "Your output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n", "```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n", "[END OF FORMAT INSTRUCTION]\n", "\n", "[BEGIN OF QUERY]\n", "User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\n", "[END OF QUERY]\n", "\n", "[BEGIN OF HISTORY STEPS]\n", "[{'thought':\"\", 'tool_calls':[{\"name\": \"cancel_booking\", \"arguments\": {\"booking_id\": \"YGA123\"}}], 'step_id':1, 'next_observation':{\"data\": {\"class_name\": \"Yoga for Relaxation\", \"class_time\": \"2039-03-10 17:00\", \"instructor_name\": \"Maggie Zhu\", \"cancellation_time\": \"2039-03-09 19:10\"}}}, {'thought':\"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\", 'tool_calls':[], 'step_id':2, 'next_observation':'', 'user_input':\"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"}, {'thought':\"\", 'tool_calls':[{\"name\": \"get_classes\", \"arguments\": {\"date\": \"2039-03-10\", \"time\": \"10:00\"}}], 'step_id':3, 'next_observation':{\"data\": [{\"name\": \"Yoga for Relaxation\", \"instructor\": \"Maggie Zhu\", \"time\": \"2039-03-10 10:00\", \"duration\": 60}, {\"name\": \"Meditation for Beginners\", \"instructor\": \"John Lee\", \"time\": \"2039-03-10 10:30\", \"duration\": 30}]}}, {'thought':\"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\", 'tool_calls':[], 'step_id':4, 'next_observation':'', 'user_input':\"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"}, {'thought':\"\", 'tool_calls':[{\"name\": \"get_classes\", \"arguments\": {\"date\": \"2039-03-10\", \"time\": \"08:00\"}}], 'step_id':5, 'next_observation':{\"data\": [{\"name\": \"Meditation for Beginners\", \"instructor\": \"John Lee\", \"time\": \"2039-03-10 08:00\", \"duration\": 30}]}} ]\n", "[END OF HISTORY STEPS]\n", "[/INST]\n" ] } ], "source": [ "messages_2 = [\n", " {\n", " \"role\": \"system\",\n", " \"content\": \"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\"\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": \"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\"\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"\",\n", " \"tool_calls\": [\n", " {\n", " \"name\": \"cancel_booking\",\n", " \"arguments\": {\n", " \"booking_id\": \"YGA123\"\n", " }\n", " }\n", " ]\n", " },\n", " {\n", " \"role\": \"tool\",\n", " \"name\": \"cancel_booking\",\n", " \"content\": {\n", " \"data\": {\n", " \"class_name\": \"Yoga for Relaxation\",\n", " \"class_time\": \"2039-03-10 17:00\",\n", " \"instructor_name\": \"Maggie Zhu\",\n", " \"cancellation_time\": \"2039-03-09 19:10\"\n", " }\n", " }\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\"\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": \"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"\",\n", " \"tool_calls\": [\n", " {\n", " \"name\": \"get_classes\",\n", " \"arguments\": {\n", " \"date\": \"2039-03-10\",\n", " \"time\": \"10:00\"\n", " }\n", " }\n", " ]\n", " },\n", " {\n", " \"role\": \"tool\",\n", " \"name\": \"get_classes\",\n", " \"content\": {\n", " \"data\": [\n", " {\n", " \"name\": \"Yoga for Relaxation\",\n", " \"instructor\": \"Maggie Zhu\",\n", " \"time\": \"2039-03-10 10:00\",\n", " \"duration\": 60\n", " },\n", " {\n", " \"name\": \"Meditation for Beginners\",\n", " \"instructor\": \"John Lee\",\n", " \"time\": \"2039-03-10 10:30\",\n", " \"duration\": 30\n", " }\n", " ]\n", " }\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\"\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": \"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"\",\n", " \"tool_calls\": [\n", " {\n", " \"name\": \"get_classes\",\n", " \"arguments\": {\n", " \"date\": \"2039-03-10\",\n", " \"time\": \"08:00\"\n", " }\n", " }\n", " ]\n", " },\n", " {\n", " \"role\": \"tool\",\n", " \"name\": \"get_classes\",\n", " \"content\": {\n", " \"data\": [\n", " {\n", " \"name\": \"Meditation for Beginners\",\n", " \"instructor\": \"John Lee\",\n", " \"time\": \"2039-03-10 08:00\",\n", " \"duration\": 30\n", " }\n", " ]\n", " }\n", " }\n", "]\n", "verify_tokenizer(messages_2, model_list, tools, BASE_XLAM_DIR, fc_modes = [True], file_suffix = \"example_2\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Example 2 (Function Call Where Tool_Calls is not a List)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "messages_2 = [\n", " {\n", " \"role\": \"system\",\n", " \"content\": \"Based on the previous context and API request history, generate an API request or a response as an AI assistant. The current time is 2039-03-09 18:56:09 Wednesday.\"\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": \"User: I need to cancel a virtual yoga class that I booked. The booking ID is YGA123.\"\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"\",\n", " \"tool_calls\": {\n", " \"name\": \"cancel_booking\",\n", " \"arguments\": {\n", " \"booking_id\": \"YGA123\"\n", " }\n", " }\n", " },\n", " {\n", " \"role\": \"tool\",\n", " \"name\": \"cancel_booking\",\n", " \"content\": {\n", " \"data\": {\n", " \"class_name\": \"Yoga for Relaxation\",\n", " \"class_time\": \"2039-03-10 17:00\",\n", " \"instructor_name\": \"Maggie Zhu\",\n", " \"cancellation_time\": \"2039-03-09 19:10\"\n", " }\n", " }\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"AI: Your booking for Yoga for Relaxation with Maggie Zhu on 2039-03-10 at 17:00 has been successfully cancelled and the cancellation time is 2039-03-09 19:10.\"\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": \"User: Can you please show me the virtual yoga and meditation classes available on the platform for tomorrow at 10am?\"\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"\",\n", " \"tool_calls\": {\n", " \"name\": \"get_classes\",\n", " \"arguments\": {\n", " \"date\": \"2039-03-10\",\n", " \"time\": \"10:00\"\n", " }\n", " }\n", " },\n", " {\n", " \"role\": \"tool\",\n", " \"name\": \"get_classes\",\n", " \"content\": {\n", " \"data\": [\n", " {\n", " \"name\": \"Yoga for Relaxation\",\n", " \"instructor\": \"Maggie Zhu\",\n", " \"time\": \"2039-03-10 10:00\",\n", " \"duration\": 60\n", " },\n", " {\n", " \"name\": \"Meditation for Beginners\",\n", " \"instructor\": \"John Lee\",\n", " \"time\": \"2039-03-10 10:30\",\n", " \"duration\": 30\n", " }\n", " ]\n", " }\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"AI: There are two virtual yoga and meditation classes available on the platform for tomorrow at 10am: Yoga for Relaxation with Maggie Zhu that lasts for 60 minutes starting at 10:00 and Meditation for Beginners with John Lee which lasts for 30 minutes, starting at 10:30.\"\n", " },\n", " {\n", " \"role\": \"user\",\n", " \"content\": \"User: I'd like to book a virtual meditation class called \\\"Meditation for Beginners\\\" tomorrow at 8am.\"\n", " },\n", " {\n", " \"role\": \"assistant\",\n", " \"content\": \"\",\n", " \"tool_calls\": {\n", " \"name\": \"get_classes\",\n", " \"arguments\": {\n", " \"date\": \"2039-03-10\",\n", " \"time\": \"08:00\"\n", " }\n", " }\n", " },\n", " {\n", " \"role\": \"tool\",\n", " \"name\": \"get_classes\",\n", " \"content\": {\n", " \"data\": [\n", " {\n", " \"name\": \"Meditation for Beginners\",\n", " \"instructor\": \"John Lee\",\n", " \"time\": \"2039-03-10 08:00\",\n", " \"duration\": 30\n", " }\n", " ]\n", " }\n", " }\n", "]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "ename": "TemplateError", "evalue": "The tool_calls must be a list!", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTemplateError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[11], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124;03mtool_calls is similar to OpenAI requirements, and it should be a list of API call(s), not a dictionary \u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[43mverify_tokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages_2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_list\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mBASE_XLAM_DIR\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfc_modes\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_suffix\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexample_2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn[7], line 10\u001b[0m, in \u001b[0;36mverify_tokenizer\u001b[0;34m(messages, model_list, tools, BASE_XLAM_DIR, fc_modes, file_suffix)\u001b[0m\n\u001b[1;32m 7\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m AutoTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(checkpoint)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fc_mode:\n\u001b[0;32m---> 10\u001b[0m model_input \u001b[38;5;241m=\u001b[39m \u001b[43mtokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_chat_template\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[43mtokenize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 16\u001b[0m model_input \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mapply_chat_template(\n\u001b[1;32m 17\u001b[0m messages,\n\u001b[1;32m 18\u001b[0m tokenize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 19\u001b[0m )\n", "File \u001b[0;32m/export/agentstudio-family/miniconda3/v2lite-train/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1867\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.apply_chat_template\u001b[0;34m(self, conversation, tools, documents, chat_template, add_generation_prompt, continue_final_message, tokenize, padding, truncation, max_length, return_tensors, return_dict, return_assistant_tokens_mask, tokenizer_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 1865\u001b[0m all_generation_indices\u001b[38;5;241m.\u001b[39mappend(generation_indices)\n\u001b[1;32m 1866\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1867\u001b[0m rendered_chat \u001b[38;5;241m=\u001b[39m \u001b[43mcompiled_template\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrender\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1868\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1869\u001b[0m \u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtool_schemas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1870\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocuments\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdocuments\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1871\u001b[0m \u001b[43m \u001b[49m\u001b[43madd_generation_prompt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madd_generation_prompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1872\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mtemplate_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1873\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1874\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m continue_final_message:\n\u001b[1;32m 1875\u001b[0m final_message \u001b[38;5;241m=\u001b[39m chat[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", "File \u001b[0;32m/export/agentstudio-family/miniconda3/v2lite-train/lib/python3.10/site-packages/jinja2/environment.py:1304\u001b[0m, in \u001b[0;36mTemplate.render\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1302\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menvironment\u001b[38;5;241m.\u001b[39mconcat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mroot_render_func(ctx)) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1303\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[0;32m-> 1304\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menvironment\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhandle_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/export/agentstudio-family/miniconda3/v2lite-train/lib/python3.10/site-packages/jinja2/environment.py:939\u001b[0m, in \u001b[0;36mEnvironment.handle_exception\u001b[0;34m(self, source)\u001b[0m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Exception handling helper. This is used internally to either raise\u001b[39;00m\n\u001b[1;32m 935\u001b[0m \u001b[38;5;124;03mrewritten exceptions or return a rendered traceback for the template.\u001b[39;00m\n\u001b[1;32m 936\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 937\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdebug\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m rewrite_traceback_stack\n\u001b[0;32m--> 939\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m rewrite_traceback_stack(source\u001b[38;5;241m=\u001b[39msource)\n", "File \u001b[0;32m