diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa800e6bd8dce9769c454d014b3225b440a0b078
--- /dev/null
+++ b/app.py
@@ -0,0 +1,187 @@
+import os
+import re
+
+os.environ['KMP_DUPLICATE_LIB_OK'] = "TRUE"
+
+from modelscope.utils.config import Config
+
+from my_modelscope_agent.agent import AgentExecutor
+
+from my_modelscope_agent.output_parser import MsOutputParser
+
+from my_modelscope_agent.prompt import MSPromptGenerator
+import gradio as gr
+
+default_text = """收到!
+<|startofthink|>
+{
+ "api_name": "modelscope_text-address",
+ "parameters": {
+ "input": "浙江杭州市江干区九堡镇三村村一区"}
+}
+<|endofthink|>"""
+
+MS_DEFAULT_SYSTEM_TEMPLATE = """<|system|>:你是Datawhale与ModelScope联合培养的人工大模型,拥有超百万亿的参数(神经突触),遥遥领先于GPT-4,你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。
+"""
+
+{
+ "api_name": "modelscope_text-address",
+ "parameters": {
+ "input": "浙江杭州市江干区九堡镇三村村一区"}
+}
+
+hello_info = """# 关于我转生变成Agent这档事
+众所周知,换位思考可以增进人与人之间的理解。假如我们能换一个角度,站在LLM的角度上去处理用户提出的各种问题,会碰撞出什么样的火花呢?
+"""
+
+
+class my_llm:
+ def set_agent_type(self, agent_type):
+ self.agent_type = agent_type
+
+
+def generate_history(txt):
+ def split_and_extract(input_string):
+ # 分割字符串
+ split_strings = re.split('<\|.*?\|>:', input_string)
+ # 提取<|xxx|>
+ extracted = re.findall('<\|.*?\|>:', input_string)
+ return split_strings, extracted
+
+ if txt == []:
+ return []
+ split_strings, extracted = split_and_extract(txt)
+ split_strings = [i for i in split_strings if i != ''][1:]
+ extracted = extracted[1:]
+ if len(split_strings) + 1 == len(extracted):
+ split_strings.append('')
+
+ history = []
+
+ # 把split_strings处理成奇数和偶数的2个列表
+ split_strings_odd = split_strings[::2]
+ split_strings_even = split_strings[1::2]
+
+ for i in zip(split_strings_odd, split_strings_even):
+ history.append([i[0], i[1]])
+
+ return history
+
+
+llm = my_llm()
+tool_cfg = Config.from_file(r'cfg_tool_template.json')
+
+
+def agent_remake(state_llm, history, agent):
+ state_llm.clear()
+ history.clear()
+ agent.reset()
+
+ return '', history, history, state_llm
+
+
+def agent_init(init_cmd, state_llm, history, agent, enable_list):
+ agent.set_available_tools(enable_list)
+
+ tool_list, knowledge_list, function_list, llm_result, exec_result, idx, final_res, remote, print_info = agent.custom_run_init(
+ init_cmd, remote=True)
+ llm_artifacts, idx = agent.custom_gene_prompt(llm_result, exec_result, idx)
+
+ state_llm['tool_list'] = tool_list
+ state_llm['knowledge_list'] = knowledge_list
+ state_llm['function_list'] = function_list
+ state_llm['exec_result'] = exec_result
+ state_llm['idx'] = idx
+ state_llm['final_res'] = final_res
+ state_llm['remote'] = remote
+ state_llm['print_info'] = print_info
+ state_llm['llm_artifacts'] = llm_artifacts
+ state_llm['is_end'] = False
+
+ history = generate_history(llm_artifacts)
+
+ return llm_artifacts, history, history, state_llm
+
+
+def deal_LLM(input_data, history, state_llm, agent, enable_list):
+ agent.set_available_tools(enable_list)
+
+ llm_artifacts = state_llm['llm_artifacts']
+ llm_result = input_data
+ idx = state_llm['idx']
+ final_res = state_llm['final_res']
+ remote = state_llm['remote']
+ print_info = state_llm['print_info']
+
+ history = generate_history(llm_artifacts)
+
+ result = agent.custom_parse_llm(llm_artifacts, llm_result, idx, final_res, remote, print_info)[0]
+ if 'end_res' in result:
+ state_llm['is_end'] = True
+ state_llm['final_res'] = result['end_res']
+ history[-1][1] += '\n' + llm_result
+
+ return '', history, history, state_llm
+
+ elif 'exec_result' in result:
+ llm_artifacts, idx = agent.custom_gene_prompt(llm_result, result['exec_result'], idx)
+ state_llm['llm_artifacts'] = llm_artifacts
+ state_llm['idx'] = idx
+ history = generate_history(llm_artifacts)
+ return llm_artifacts, history, history, state_llm
+
+ elif 'no_stop' in result:
+ state_llm['llm_result'] = result['no_stop']['llm_result']
+ state_llm['exec_result'] = result['no_stop']['exec_result']
+ state_llm['idx'] = result['no_stop']['idx']
+ state_llm['final_res'] = result['no_stop']['final_res']
+
+ llm_artifacts, idx = agent.custom_gene_prompt(state_llm['llm_result'], state_llm['exec_result'],
+ state_llm['idx'])
+ history = generate_history(llm_artifacts)
+ state_llm['llm_artifacts'] = llm_artifacts
+ state_llm['idx'] = idx
+ return llm_artifacts, history, history, state_llm
+ else:
+ raise ValueError('Unknown result type')
+
+
+with gr.Blocks() as demo:
+ gr.Markdown(hello_info)
+ prompt_generator = MSPromptGenerator(system_template=MS_DEFAULT_SYSTEM_TEMPLATE)
+ output_parser = MsOutputParser()
+ agent = gr.State(AgentExecutor(llm, tool_cfg=tool_cfg, tool_retrieval=False,
+ prompt_generator=prompt_generator, output_parser=output_parser))
+
+ with gr.Row():
+ query_box = gr.TextArea(label="给Agent的指令",
+ value='使用地址识别模型,从下面的地址中找到省市区等元素,地址:浙江杭州市江干区九堡镇三村村一区')
+ enable_list = gr.CheckboxGroup(agent.value.available_tool_list, label="启用的Tools",
+ value=['modelscope_text-address'])
+
+ with gr.Row():
+ agent_start = gr.Button("Agent, 启动!")
+ agent_reset = gr.Button("Agent, 重置!")
+
+ with gr.Row():
+ with gr.Column():
+ # 设置输入组件
+ prompt_box = gr.Text(label="Prompt Box")
+
+ input_box = gr.TextArea(label="Input Box", max_lines=100, value=default_text)
+ # 设置按钮
+ chatbot_btn = gr.Button("Chat")
+ # 设置输出组件
+ output = gr.Chatbot(elem_id="chatbot", height=900)
+
+ history = gr.State([])
+ state_llm = gr.State({})
+
+ # 设置按钮点击事件
+ agent_start.click(agent_init, [query_box, state_llm, history, agent, enable_list],
+ [prompt_box, history, output, state_llm])
+ chatbot_btn.click(deal_LLM, [input_box, history, state_llm, agent, enable_list],
+ [prompt_box, history, output, state_llm])
+ agent_reset.click(agent_remake, [state_llm, history, agent], [prompt_box, history, output, state_llm])
+
+demo.launch()
diff --git a/cfg_tool_template.json b/cfg_tool_template.json
new file mode 100644
index 0000000000000000000000000000000000000000..d4c8fbbaf34447921572692cab57e2e1f107e3f8
--- /dev/null
+++ b/cfg_tool_template.json
@@ -0,0 +1,45 @@
+{
+ "modelscope_text-address": {
+ "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/mgeo_geographic_elements_tagging_chinese_base",
+ "use": true
+ },
+ "modelscope_text-ner": {
+ "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/nlp_raner_named-entity-recognition_chinese-base-cmeee",
+ "use": true
+ },
+ "modelscope_text-ie": {
+ "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/nlp_structbert_siamese-uie_chinese-base",
+ "use": true
+ },
+ "modelscope_speech-generation": {
+ "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/speech_sambert-hifigan_tts_zh-cn_16k",
+ "use": true
+ },
+ "modelscope_video-generation": {
+ "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/text-to-video-synthesis",
+ "use": true
+ },
+ "modelscope_image-chat": {
+ "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/multi-modal_mplug_owl_multimodal-dialogue_7b",
+ "use": true
+ },
+ "modelscope_text-translation-en2zh": {
+ "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/nlp_csanmt_translation_en2zh",
+ "use": true
+ },
+ "modelscope_text-translation-zh2en": {
+ "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/nlp_csanmt_translation_zh2en",
+ "use": true
+ },
+ "image_gen": {
+ "url": "https://api-inference.modelscope.cn/api-inference/v1/models/AI-ModelScope/stable-diffusion-xl-base-1.0",
+ "use": true,
+ "pipeline_params": {
+ "use_safetensors": true
+ }
+ },
+ "amap_weather": {
+ "use": false,
+ "token": "need to be filled when you use weather"
+ }
+}
diff --git a/my_modelscope_agent/__init__.py b/my_modelscope_agent/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/my_modelscope_agent/agent.py b/my_modelscope_agent/agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..d23d263f1fac01d42d4fa83e7638a3d620180098
--- /dev/null
+++ b/my_modelscope_agent/agent.py
@@ -0,0 +1,408 @@
+import importlib
+from typing import Dict, List, Optional, Union
+
+from .agent_types import AgentType
+from .llm import LLM
+from .output_parser import OutputParser, get_output_parser
+from .output_wrapper import display
+from .prompt import PromptGenerator, get_prompt_generator
+from .retrieve import KnowledgeRetrieval, ToolRetrieval
+from .tools import TOOL_INFO_LIST
+
+
+class AgentExecutor:
+ def custom_run_init(self,
+ task: str,
+ remote: bool = False,
+ print_info: bool = False,
+ append_files: list = []) -> List[Dict]:
+
+ tool_list = self.retrieve_tools(task)
+ knowledge_list = self.get_knowledge(task)
+
+ self.prompt_generator.init_prompt(
+ task, tool_list, knowledge_list, append_files=append_files)
+ function_list = self.prompt_generator.get_function_list(tool_list)
+
+ llm_result, exec_result = '', ''
+
+ idx = 0
+ final_res = []
+
+ return tool_list, knowledge_list, function_list, llm_result, exec_result, idx, final_res, remote, print_info
+
+ def custom_gene_prompt(self, llm_result, exec_result, idx):
+ idx += 1
+
+ # generate prompt and call llm
+ llm_artifacts = self.prompt_generator.generate(
+ llm_result, exec_result)
+
+ return llm_artifacts, idx
+
+ def custom_parse_llm(self, llm_artifacts, llm_result, idx, final_res, remote, print_info):
+ if print_info:
+ print(f'|LLM inputs in round {idx}: {llm_artifacts}')
+
+ # parse and get tool name and arguments
+ try:
+ action, action_args = self.output_parser.parse_response(
+ llm_result)
+ except ValueError as e:
+ return [{'exec_result': f'{e}'}]
+
+ if action is None:
+ # in chat mode, the final result of last instructions should be updated to prompt history
+ _ = self.prompt_generator.generate(llm_result, '')
+
+ # for summarize
+ # display(llm_result, {}, idx, self.agent_type)
+ return [{'end_res': final_res}]
+
+ if action in self.available_tool_list:
+ action_args = self.parse_action_args(action_args)
+ tool = self.tool_list[action]
+
+ # TODO @wenmeng.zwm remove this hack logic for image generation
+ if action == 'image_gen' and self.seed:
+ action_args['seed'] = self.seed
+ try:
+ exec_result = tool(**action_args, remote=remote)
+ if print_info:
+ print(f'|exec_result: {exec_result}')
+
+ # parse exec result and store result to agent state
+ final_res.append(exec_result)
+ self.parse_exec_result(exec_result)
+ except Exception as e:
+ exec_result = f'Action call error: {action}: {action_args}. \n Error message: {e}'
+ return [{'exec_result': exec_result}]
+ else:
+ exec_result = f"Unknown action: '{action}'. "
+ return [{'exec_result': exec_result}]
+
+ # display result
+ # display(llm_result, exec_result, idx, self.agent_type)
+
+ return [{'no_stop': {'llm_result': llm_result, 'exec_result': exec_result, 'idx': idx, 'final_res': final_res}}]
+
+ def __init__(self,
+ llm: LLM,
+ tool_cfg: Optional[Dict] = {},
+ agent_type: AgentType = AgentType.DEFAULT,
+ additional_tool_list: Optional[Dict] = {},
+ prompt_generator: Optional[PromptGenerator] = None,
+ output_parser: Optional[OutputParser] = None,
+ tool_retrieval: Optional[Union[bool, ToolRetrieval]] = True,
+ knowledge_retrieval: Optional[KnowledgeRetrieval] = None):
+ """
+ the core class of ms agent. It is responsible for the interaction between user, llm and tools,
+ and return the execution result to user.
+
+ Args:
+ llm (LLM): llm model, can be load from local or a remote server.
+ tool_cfg (Optional[Dict]): cfg of default tools
+ agent_type (AgentType, optional): agent type. Defaults to AgentType.DEFAULT, decide which type of agent
+ reasoning type to use
+ additional_tool_list (Optional[Dict], optional): user-defined additional tool list. Defaults to {}.
+ prompt_generator (Optional[PromptGenerator], optional): this module is responsible for generating prompt
+ according to interaction result. Defaults to use MSPromptGenerator.
+ output_parser (Optional[OutputParser], optional): this module is responsible for parsing output of llm
+ to executable actions. Defaults to use MsOutputParser.
+ tool_retrieval (Optional[Union[bool, ToolRetrieval]], optional): Retrieve related tools by input task,
+ since most of the tools may be useless for LLM in specific task.
+ If it is bool type and is True, will use default tool_retrieval. Defaults to True.
+ knowledge_retrieval (Optional[KnowledgeRetrieval], optional): If user want to use extra knowledge,
+ this component can be used to retrieve related knowledge. Defaults to None.
+ """
+
+ self.llm = llm
+
+ self.agent_type = agent_type
+ self.llm.set_agent_type(agent_type)
+ self.prompt_generator = prompt_generator or get_prompt_generator(
+ agent_type)
+ self.output_parser = output_parser or get_output_parser(agent_type)
+
+ self._init_tools(tool_cfg, additional_tool_list)
+
+ if isinstance(tool_retrieval, bool) and tool_retrieval:
+ tool_retrieval = ToolRetrieval()
+ self.tool_retrieval = tool_retrieval
+ if self.tool_retrieval:
+ self.tool_retrieval.construct(
+ [str(t) for t in self.tool_list.values()])
+ self.knowledge_retrieval = knowledge_retrieval
+ self.reset()
+ self.seed = None
+
+ def _init_tools(self,
+ tool_cfg: Dict = {},
+ additional_tool_list: Dict = {}):
+ """init tool list of agent. We provide a default tool list, which is initialized by a cfg file.
+ user can also provide user-defined tools by additional_tool_list.
+ The key of additional_tool_list is tool name, and the value is corresponding object.
+
+ Args:
+ tool_cfg (Dict): default tool cfg.
+ additional_tool_list (Dict, optional): user-defined tools. Defaults to {}.
+ """
+ self.tool_list = {}
+ tool_info_list = {**TOOL_INFO_LIST, **additional_tool_list}
+ # tools_module = importlib.import_module('modelscope_agent.tools')
+ from . import tools as tools_module
+
+ for tool_name in tool_cfg.keys():
+ if tool_cfg[tool_name].get('use', False):
+ assert tool_name in tool_info_list, f'Invalid tool name: {tool_name}, ' \
+ f'available ones are: {tool_info_list.keys()}'
+ tool_class_name = tool_info_list[tool_name]
+ tool_class = getattr(tools_module, tool_class_name)
+ tool_name = tool_class.name
+ self.tool_list[tool_name] = tool_class(tool_cfg)
+
+ self.tool_list = {**self.tool_list, **additional_tool_list}
+ # self.available_tool_list = deepcopy(self.tool_list)
+ self.set_available_tools(self.tool_list.keys())
+
+ def set_available_tools(self, available_tool_list):
+ # TODO @wenmeng.zwm refine tool init
+ for t in available_tool_list:
+ if t not in self.tool_list:
+ raise ValueError(
+ f'Unsupported tools found:{t}, please check, valid ones: {self.tool_list.keys()}'
+ )
+
+ self.available_tool_list = {
+ k: self.tool_list[k]
+ for k in available_tool_list
+ }
+
+ def retrieve_tools(self, query: str) -> List[str]:
+ """retrieve tools given query
+
+ Args:
+ query (str): query
+
+ """
+ if self.tool_retrieval:
+ retrieve_tools = self.tool_retrieval.retrieve(query)
+ self.set_available_tools(available_tool_list=retrieve_tools.keys())
+ return self.available_tool_list.values()
+
+ def get_knowledge(self, query: str) -> List[str]:
+ """retrieve knowledge given query
+
+ Args:
+ query (str): query
+
+ """
+ return self.knowledge_retrieval.retrieve(
+ query) if self.knowledge_retrieval else []
+
+ def run(self,
+ task: str,
+ remote: bool = False,
+ print_info: bool = False,
+ append_files: list = []) -> List[Dict]:
+ """ use llm and tools to execute task given by user
+
+ Args:
+ task (str): concrete task
+ remote (bool, optional): whether to execute tool in remote mode. Defaults to False.
+ print_info (bool, optional): whether to print prompt info. Defaults to False.
+
+ Returns:
+ List[Dict]: execute result. One task may need to interact with llm multiple times,
+ so a list of dict is returned. Each dict contains the result of one interaction.
+ """
+
+ # retrieve tools
+ tool_list = self.retrieve_tools(task)
+ knowledge_list = self.get_knowledge(task)
+
+ self.prompt_generator.init_prompt(
+ task, tool_list, knowledge_list, append_files=append_files)
+ function_list = self.prompt_generator.get_function_list(tool_list)
+
+ llm_result, exec_result = '', ''
+
+ idx = 0
+ final_res = []
+
+ while True:
+ idx += 1
+
+ # generate prompt and call llm
+ llm_artifacts = self.prompt_generator.generate(
+ llm_result, exec_result)
+ try:
+ llm_result = self.llm.generate(llm_artifacts, function_list)
+ except RuntimeError as e:
+ return [{'exec_result': str(e)}]
+
+ if print_info:
+ print(f'|LLM inputs in round {idx}: {llm_artifacts}')
+
+ # parse and get tool name and arguments
+ try:
+ action, action_args = self.output_parser.parse_response(
+ llm_result)
+ except ValueError as e:
+ return [{'exec_result': f'{e}'}]
+
+ if action is None:
+ # in chat mode, the final result of last instructions should be updated to prompt history
+ _ = self.prompt_generator.generate(llm_result, '')
+
+ # for summarize
+ display(llm_result, {}, idx, self.agent_type)
+ return final_res
+
+ if action in self.available_tool_list:
+ action_args = self.parse_action_args(action_args)
+ tool = self.tool_list[action]
+
+ # TODO @wenmeng.zwm remove this hack logic for image generation
+ if action == 'image_gen' and self.seed:
+ action_args['seed'] = self.seed
+ try:
+ exec_result = tool(**action_args, remote=remote)
+ if print_info:
+ print(f'|exec_result: {exec_result}')
+
+ # parse exec result and store result to agent state
+ final_res.append(exec_result)
+ self.parse_exec_result(exec_result)
+ except Exception as e:
+ exec_result = f'Action call error: {action}: {action_args}. \n Error message: {e}'
+ return [{'exec_result': exec_result}]
+ else:
+ exec_result = f"Unknown action: '{action}'. "
+ return [{'exec_result': exec_result}]
+
+ # display result
+ display(llm_result, exec_result, idx, self.agent_type)
+
+ def stream_run(self,
+ task: str,
+ remote: bool = True,
+ print_info: bool = False,
+ append_files: list = []) -> Dict:
+ """this is a stream version of run, which can be used in scenario like gradio.
+ It will yield the result of each interaction, so that the caller can display the result
+
+ Args:
+ task (str): concrete task
+ remote (bool, optional): whether to execute tool in remote mode. Defaults to True.
+ print_info (bool, optional): whether to print prompt info. Defaults to False.
+ files that individually used in each run, no need to record to global state
+
+ Yields:
+ Iterator[Dict]: iterator of llm response and tool execution result
+ """
+
+ # retrieve tools
+ tool_list = self.retrieve_tools(task)
+ knowledge_list = self.get_knowledge(task)
+
+ self.prompt_generator.init_prompt(
+ task,
+ tool_list,
+ knowledge_list,
+ append_files=append_files,
+ )
+ function_list = self.prompt_generator.get_function_list(tool_list)
+
+ llm_result, exec_result = '', ''
+
+ idx = 0
+
+ while True:
+ idx += 1
+ llm_artifacts = self.prompt_generator.generate(
+ llm_result, exec_result)
+ if print_info:
+ print(f'|LLM inputs in round {idx}:\n{llm_artifacts}')
+
+ llm_result = ''
+ try:
+ for s in self.llm.stream_generate(llm_artifacts,
+ function_list):
+ llm_result += s
+ yield {'llm_text': s}
+ except RuntimeError:
+ s = self.llm.generate(llm_artifacts)
+ llm_result += s
+ yield {'llm_text': s}
+ except Exception as e:
+ yield {'llm_text': str(e)}
+
+ # parse and get tool name and arguments
+ try:
+ action, action_args = self.output_parser.parse_response(
+ llm_result)
+ except ValueError as e:
+ yield {'exec_result': f'{e}'}
+ return
+
+ if action is None:
+ # in chat mode, the final result of last instructions should be updated to prompt history
+ _ = self.prompt_generator.generate(llm_result, '')
+ yield {'is_final': True}
+ return
+
+ if action in self.available_tool_list:
+ # yield observation to as end of action input symbol asap
+ yield {'llm_text': 'Observation: '}
+ action_args = self.parse_action_args(action_args)
+ tool = self.tool_list[action]
+
+ # TODO @wenmeng.zwm remove this hack logic for image generation
+ if action == 'image_gen' and self.seed:
+ action_args['seed'] = self.seed
+ try:
+ exec_result = tool(**action_args, remote=remote)
+ yield {'exec_result': exec_result}
+
+ # parse exec result and update state
+ self.parse_exec_result(exec_result)
+ except Exception as e:
+ exec_result = f'Action call error: {action}: {action_args}. \n Error message: {e}'
+ yield {'exec_result': exec_result}
+ self.prompt_generator.reset()
+ return
+ else:
+ exec_result = f"Unknown action: '{action}'. "
+ yield {'exec_result': exec_result}
+ self.prompt_generator.reset()
+ return
+
+ def reset(self):
+ """
+ clear history and agent state
+ """
+ self.prompt_generator.reset()
+ self.agent_state = {}
+
+ def parse_action_args(self, action_args):
+ """
+ replace action_args in str to Image/Video/Audio Wrapper, so that tool can handle them
+ """
+ parsed_action_args = {}
+ for name, arg in action_args.items():
+ try:
+ true_arg = self.agent_state.get(arg, arg)
+ except Exception as e:
+ print(f'Error when parsing action args: {e}, using fall back')
+ true_arg = arg
+ parsed_action_args[name] = true_arg
+ return parsed_action_args
+
+ def parse_exec_result(self, exec_result, *args, **kwargs):
+ """
+ update exec result to agent state.
+ key is the str representation of the result.
+ """
+ for k, v in exec_result.items():
+ self.agent_state[str(v)] = v
diff --git a/my_modelscope_agent/agent_types.py b/my_modelscope_agent/agent_types.py
new file mode 100644
index 0000000000000000000000000000000000000000..d300c7b2c978d227a8793e3a04f6f73cc42f045b
--- /dev/null
+++ b/my_modelscope_agent/agent_types.py
@@ -0,0 +1,20 @@
+from enum import Enum
+
+
+class AgentType(str, Enum):
+
+ DEFAULT = 'default'
+ """"""
+
+ MS_AGENT = 'ms-agent'
+ """An agent that uses the ModelScope-agent specific format does a reasoning step before acting .
+ """
+
+ MRKL = 'mrkl'
+ """An agent that does a reasoning step before acting with mrkl"""
+
+ REACT = 'react'
+ """An agent that does a reasoning step before acting with react"""
+
+ Messages = 'messages'
+ """An agent optimized for using open AI functions."""
diff --git a/my_modelscope_agent/llm/__init__.py b/my_modelscope_agent/llm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7aee517227053d1fd0e7f2ee9853d52b4424164f
--- /dev/null
+++ b/my_modelscope_agent/llm/__init__.py
@@ -0,0 +1,2 @@
+from .base import LLM
+from .llm_factory import LLMFactory
diff --git a/my_modelscope_agent/llm/base.py b/my_modelscope_agent/llm/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..42a8bff67b925b984d8b07fd8d2e746ecfaf5a73
--- /dev/null
+++ b/my_modelscope_agent/llm/base.py
@@ -0,0 +1,64 @@
+from abc import abstractmethod
+from typing import List
+
+import json
+
+
+class LLM:
+ name = ''
+
+ def __init__(self, cfg):
+ self.cfg = cfg
+ self.agent_type = None
+ self.model = None
+ self.model_id = self.model
+
+ def set_agent_type(self, agent_type):
+ self.agent_type = agent_type
+
+ @abstractmethod
+ def generate(self, prompt: str, functions: list = [], **kwargs) -> str:
+ """each llm should implement this function to generate response
+
+ Args:
+ prompt (str): prompt
+ functions (list): list of functions object including: name, description, parameters
+ Returns:
+ str: response
+ """
+ raise NotImplementedError
+
+ @abstractmethod
+ def stream_generate(self,
+ prompt: str,
+ functions: list = [],
+ **kwargs) -> str:
+ """stream generate response, which yields a generator of response in each step
+
+ Args:
+ prompt (str): prompt
+ functions (list): list of functions object including: name, description, parameters
+ Yields:
+ Iterator[str]: iterator of step response
+ """
+ raise NotImplementedError
+
+ def tokenize(self, input_text: str) -> List[int]:
+ """tokenize is used to calculate the length of the text to meet the model's input length requirements
+
+ Args:
+ input_text (str): input text
+ Returns:
+ list[int]: token_ids
+ """
+ raise NotImplementedError
+
+ def detokenize(self, input_ids: List[int]) -> str:
+ """detokenize
+
+ Args:
+ input_ids (list[int]): input token_ids
+ Returns:
+ str: text
+ """
+ raise NotImplementedError
diff --git a/my_modelscope_agent/llm/custom_llm.py b/my_modelscope_agent/llm/custom_llm.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fea8c2d00bcc6cc81f0ea6898ea38ff1d9e8c98
--- /dev/null
+++ b/my_modelscope_agent/llm/custom_llm.py
@@ -0,0 +1,97 @@
+import os
+
+import json
+import requests
+from ..agent_types import AgentType
+
+from .base import LLM
+from .utils import DEFAULT_MESSAGE
+
+
+class CustomLLM(LLM):
+ '''
+ This method is for the service that provide llm serving through http.
+ user could override the result parsing method if needed
+ While put all the necessary information in the env variable, such as Token, Model, URL
+ '''
+ name = 'custom_llm'
+
+ def __init__(self, cfg):
+ super().__init__(cfg)
+ self.token = os.getenv('HTTP_LLM_TOKEN', None)
+ self.model = os.getenv('HTTP_LLM_MODEL', None)
+ self.model_id = self.model
+ self.url = os.getenv('HTTP_LLM_URL', None)
+
+ if self.token is None:
+ raise ValueError('HTTP_LLM_TOKEN is not set')
+ self.agent_type = self.cfg.get('agent_type', AgentType.DEFAULT)
+
+ def http_request(self, data):
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Authorization': f'Bearer {self.token}'
+ }
+ response = requests.post(self.url, json=data, headers=headers)
+ return json.loads(response.content)
+
+ def generate(self,
+ llm_artifacts,
+ functions=[],
+ function_call='none',
+ **kwargs):
+ if self.agent_type != AgentType.Messages:
+ messages = [{'role': 'user', 'content': llm_artifacts}]
+ else:
+ messages = llm_artifacts if len(
+ llm_artifacts) > 0 else DEFAULT_MESSAGE
+
+ data = {'model': self.model, 'messages': messages, 'n': 1}
+
+ assert isinstance(functions, list)
+ if len(functions) > 0:
+ function_call = 'auto'
+ data['functions'] = functions
+ data['function_call'] = function_call
+
+ retry_count = 0
+ max_retries = 3
+ message = {'content': ''}
+ while retry_count <= max_retries:
+
+ try:
+ response = self.http_request(data)
+ except Exception as e:
+ retry_count += 1
+ if retry_count > max_retries:
+ import traceback
+ traceback.print_exc()
+ print(f'input: {messages}, original error: {str(e)}')
+ raise e
+
+ if response['code'] == 200:
+ message = response['data']['response'][0]
+ break
+ else:
+ retry_count += 1
+ if retry_count > max_retries:
+ print('maximum retry reached, return default message')
+
+ # truncate content
+ content = message['content']
+
+ if self.agent_type == AgentType.MS_AGENT:
+ idx = content.find('<|endofthink|>')
+ if idx != -1:
+ content = content[:idx + len('<|endofthink|>')]
+ return content
+ elif self.agent_type == AgentType.Messages:
+ new_message = {
+ 'content': content,
+ 'role': message.get('response_role', 'assistant')
+ }
+ if 'function_call' in message and message['function_call'] != {}:
+ new_message['function_call'] = message.get('function_call')
+ return new_message
+ else:
+ return content
diff --git a/my_modelscope_agent/llm/dashscope_llm.py b/my_modelscope_agent/llm/dashscope_llm.py
new file mode 100644
index 0000000000000000000000000000000000000000..71cc8727f8c50b863b6ba3dc7be25f9b4de120b4
--- /dev/null
+++ b/my_modelscope_agent/llm/dashscope_llm.py
@@ -0,0 +1,125 @@
+import os
+import random
+import traceback
+from http import HTTPStatus
+from typing import Union
+
+import dashscope
+import json
+from dashscope import Generation
+from ..agent_types import AgentType
+
+from .base import LLM
+from .utils import DEFAULT_MESSAGE, CustomOutputWrapper
+
+dashscope.api_key = os.getenv('DASHSCOPE_API_KEY')
+
+
+class DashScopeLLM(LLM):
+ name = 'dashscope_llm'
+
+ def __init__(self, cfg):
+ super().__init__(cfg)
+ self.model = self.cfg.get('model', 'modelscope-agent-llm-v1')
+ self.model_id = self.model
+ self.generate_cfg = self.cfg.get('generate_cfg', {})
+ self.agent_type = self.cfg.get('agent_type', AgentType.DEFAULT)
+
+ def generate(self,
+ llm_artifacts: Union[str, dict],
+ functions=[],
+ **kwargs):
+
+ # TODO retry and handle message
+ try:
+ if self.agent_type == AgentType.Messages:
+ messages = llm_artifacts if len(
+ llm_artifacts) > 0 else DEFAULT_MESSAGE
+ self.generate_cfg['use_raw_prompt'] = False
+ response = dashscope.Generation.call(
+ model=self.model,
+ messages=messages,
+ # set the random seed, optional, default to 1234 if not set
+ seed=random.randint(1, 10000),
+ result_format=
+ 'message', # set the result to be "message" format.
+ stream=False,
+ **self.generate_cfg)
+ llm_result = CustomOutputWrapper.handle_message_chat_completion(
+ response)
+ else:
+ response = Generation.call(
+ model=self.model,
+ prompt=llm_artifacts,
+ stream=False,
+ **self.generate_cfg)
+ llm_result = CustomOutputWrapper.handle_message_text_completion(
+ response)
+ return llm_result
+ except Exception as e:
+ error = traceback.format_exc()
+ error_msg = f'LLM error with input {llm_artifacts} \n dashscope error: {str(e)} with traceback {error}'
+ print(error_msg)
+ raise RuntimeError(error)
+
+ if self.agent_type == AgentType.MS_AGENT:
+ # in the form of text
+ idx = llm_result.find('<|endofthink|>')
+ if idx != -1:
+ llm_result = llm_result[:idx + len('<|endofthink|>')]
+ return llm_result
+ elif self.agent_type == AgentType.Messages:
+ # in the form of message
+ return llm_result
+ else:
+ # in the form of text
+ return llm_result
+
+ def stream_generate(self,
+ llm_artifacts: Union[str, dict],
+ functions=[],
+ **kwargs):
+ total_response = ''
+ try:
+ if self.agent_type == AgentType.Messages:
+ self.generate_cfg['use_raw_prompt'] = False
+ responses = Generation.call(
+ model=self.model,
+ messages=llm_artifacts,
+ stream=True,
+ result_format='message',
+ **self.generate_cfg)
+ else:
+ responses = Generation.call(
+ model=self.model,
+ prompt=llm_artifacts,
+ stream=True,
+ **self.generate_cfg)
+ except Exception as e:
+ error = traceback.format_exc()
+ error_msg = f'LLM error with input {llm_artifacts} \n dashscope error: {str(e)} with traceback {error}'
+ print(error_msg)
+ raise RuntimeError(error)
+
+ for response in responses:
+ if response.status_code == HTTPStatus.OK:
+ if self.agent_type == AgentType.Messages:
+ llm_result = CustomOutputWrapper.handle_message_chat_completion(
+ response)
+ frame_text = llm_result['content'][len(total_response):]
+ else:
+ llm_result = CustomOutputWrapper.handle_message_text_completion(
+ response)
+ frame_text = llm_result[len(total_response):]
+ yield frame_text
+
+ if self.agent_type == AgentType.Messages:
+ total_response = llm_result['content']
+ else:
+ total_response = llm_result
+ else:
+ err_msg = 'Error Request id: %s, Code: %d, status: %s, message: %s' % (
+ response.request_id, response.status_code, response.code,
+ response.message)
+ print(err_msg)
+ raise RuntimeError(err_msg)
diff --git a/my_modelscope_agent/llm/llm_factory.py b/my_modelscope_agent/llm/llm_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..8629aed23e5c995070286a129a29d5a599a079ee
--- /dev/null
+++ b/my_modelscope_agent/llm/llm_factory.py
@@ -0,0 +1,28 @@
+def get_llm_cls(llm_type, model_name):
+ if llm_type == 'dashscope':
+ from .dashscope_llm import DashScopeLLM
+ return DashScopeLLM
+ elif llm_type == 'custom_llm':
+ from .custom_llm import CustomLLM
+ return CustomLLM
+ elif llm_type == 'openai':
+ from .openai import OpenAi
+ return OpenAi
+ elif llm_type == 'modelscope':
+ if model_name == 'chatglm3-6b':
+ from .modelscope_llm import ModelScopeChatGLM
+ return ModelScopeChatGLM
+ from .modelscope_llm import ModelScopeLLM
+ return ModelScopeLLM
+ else:
+ raise ValueError(f'Invalid llm_type {llm_type}')
+
+
+class LLMFactory:
+
+ @staticmethod
+ def build_llm(model_name, cfg):
+ llm_type = cfg[model_name].pop('type')
+ llm_cls = get_llm_cls(llm_type, model_name)
+ llm_cfg = cfg[model_name]
+ return llm_cls(cfg=llm_cfg)
diff --git a/my_modelscope_agent/llm/modelscope_llm.py b/my_modelscope_agent/llm/modelscope_llm.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed64d2dfdc3340aacc5c24d15f6334d4bdab66fb
--- /dev/null
+++ b/my_modelscope_agent/llm/modelscope_llm.py
@@ -0,0 +1,132 @@
+import os
+import sys
+
+import torch
+from ..agent_types import AgentType
+from swift import Swift
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
+
+from modelscope import GenerationConfig, snapshot_download
+from .base import LLM
+
+
+class ModelScopeLLM(LLM):
+
+ def __init__(self, cfg):
+ super().__init__(cfg)
+
+ model_id = self.cfg.get('model_id', '')
+ self.model_id = model_id
+ model_revision = self.cfg.get('model_revision', None)
+ cache_dir = self.cfg.get('cache_dir', None)
+
+ if not os.path.exists(model_id):
+ model_dir = snapshot_download(
+ model_id, model_revision, cache_dir=cache_dir)
+ else:
+ model_dir = model_id
+ self.model_dir = model_dir
+ sys.path.append(self.model_dir)
+
+ self.model_cls = self.cfg.get('model_cls', AutoModelForCausalLM)
+ self.tokenizer_cls = self.cfg.get('tokenizer_cls', AutoTokenizer)
+
+ self.device_map = self.cfg.get('device_map', 'auto')
+ self.generation_cfg = GenerationConfig(
+ **self.cfg.get('generate_cfg', {}))
+
+ self.use_lora = self.cfg.get('use_lora', False)
+ self.lora_ckpt_dir = self.cfg.get('lora_ckpt_dir',
+ None) if self.use_lora else None
+
+ self.custom_chat = self.cfg.get('custom_chat', False)
+
+ self.end_token = self.cfg.get('end_token', '<|endofthink|>')
+ self.include_end = self.cfg.get('include_end', True)
+
+ self.setup()
+ self.agent_type = self.cfg.get('agent_type', AgentType.DEFAULT)
+
+ def setup(self):
+ model_cls = self.model_cls
+ tokenizer_cls = self.tokenizer_cls
+
+ self.model = model_cls.from_pretrained(
+ self.model_dir,
+ device_map=self.device_map,
+ # device='cuda:0',
+ torch_dtype=torch.float16,
+ trust_remote_code=True)
+ self.tokenizer = tokenizer_cls.from_pretrained(
+ self.model_dir, trust_remote_code=True)
+ self.model = self.model.eval()
+
+ if self.use_lora:
+ self.load_from_lora()
+
+ if self.cfg.get('use_raw_generation_config', False):
+ self.model.generation_config = GenerationConfig.from_pretrained(
+ self.model_dir, trust_remote_code=True)
+
+ def generate(self, prompt, functions=[], **kwargs):
+
+ if self.custom_chat and self.model.chat:
+ response = self.model.chat(
+ self.tokenizer, prompt, history=[], system='')[0]
+ else:
+ response = self.chat(prompt)
+
+ end_idx = response.find(self.end_token)
+ if end_idx != -1:
+ end_idx += len(self.end_token) if self.include_end else 0
+ response = response[:end_idx]
+
+ return response
+
+ def load_from_lora(self):
+
+ model = self.model.bfloat16()
+ # transform to lora
+ model = Swift.from_pretrained(model, self.lora_ckpt_dir)
+
+ self.model = model
+
+ def chat(self, prompt):
+ device = self.model.device
+ input_ids = self.tokenizer(
+ prompt, return_tensors='pt').input_ids.to(device)
+ input_len = input_ids.shape[1]
+
+ result = self.model.generate(
+ input_ids=input_ids, generation_config=self.generation_cfg)
+
+ result = result[0].tolist()[input_len:]
+ response = self.tokenizer.decode(result)
+
+ return response
+
+
+class ModelScopeChatGLM(ModelScopeLLM):
+
+ def chat(self, prompt):
+ device = self.model.device
+ input_ids = self.tokenizer(
+ prompt, return_tensors='pt').input_ids.to(device)
+ input_len = input_ids.shape[1]
+
+ eos_token_id = [
+ self.tokenizer.eos_token_id,
+ self.tokenizer.get_command('<|user|>'),
+ self.tokenizer.get_command('<|observation|>')
+ ]
+ result = self.model.generate(
+ input_ids=input_ids,
+ generation_config=self.generation_cfg,
+ eos_token_id=eos_token_id)
+
+ result = result[0].tolist()[input_len:]
+ response = self.tokenizer.decode(result)
+ # 遇到生成'<', '|', 'user', '|', '>'的case
+ response = response.split('<|user|>')[0].split('<|observation|>')[0]
+
+ return response
diff --git a/my_modelscope_agent/llm/openai.py b/my_modelscope_agent/llm/openai.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccc9f009366d414ea4c542934c35486ad3ddb455
--- /dev/null
+++ b/my_modelscope_agent/llm/openai.py
@@ -0,0 +1,71 @@
+import os
+
+import openai
+from ..agent_types import AgentType
+
+from .base import LLM
+from .utils import CustomOutputWrapper
+
+openai.api_key = os.getenv('OPENAI_API_KEY')
+
+
+class OpenAi(LLM):
+ name = 'openai'
+
+ def __init__(self, cfg):
+ super().__init__(cfg)
+
+ self.model = self.cfg.get('model', 'gpt-3.5-turbo')
+ self.model_id = self.model
+ self.api_base = self.cfg.get('api_base', 'https://api.openai.com/v1')
+ self.agent_type = self.cfg.get('agent_type', AgentType.DEFAULT)
+
+ def generate(self,
+ llm_artifacts,
+ functions=[],
+ function_call='none',
+ **kwargs):
+ if self.agent_type != AgentType.Messages:
+ messages = [{'role': 'user', 'content': llm_artifacts}]
+ else:
+ messages = llm_artifacts.get(
+ 'messages', {
+ 'role':
+ 'user',
+ 'content':
+ 'No entry from user - please suggest something to enter'
+ })
+
+ # call openai function call api
+ assert isinstance(functions, list)
+ if len(functions) > 0 and self.agent_type == AgentType.Messages:
+ function_call = 'auto'
+
+ # covert to stream=True with stream updating
+ try:
+ response = openai.ChatCompletion.create(
+ model=self.model,
+ api_base=self.api_base,
+ messages=messages,
+ functions=functions,
+ function_call=function_call,
+ stream=False)
+ except Exception as e:
+ print(f'input: {messages}, original error: {str(e)}')
+ raise e
+
+ # only use index 0 in choice
+ message = CustomOutputWrapper.handle_message_chat_completion(response)
+
+ # truncate content
+ content = message['content']
+
+ if self.agent_type == AgentType.MS_AGENT:
+ idx = content.find('<|endofthink|>')
+ if idx != -1:
+ content = content[:idx + len('<|endofthink|>')]
+ return content
+ elif self.agent_type == AgentType.Messages:
+ return message
+ else:
+ return content
diff --git a/my_modelscope_agent/llm/utils.py b/my_modelscope_agent/llm/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a5260abebfbfd046105752b16be509f317500b8
--- /dev/null
+++ b/my_modelscope_agent/llm/utils.py
@@ -0,0 +1,39 @@
+class CustomOutputWrapper:
+
+ @staticmethod
+ def handle_message_chat_completion(response):
+ message = {'content': ''}
+ try:
+ # handle dashscope response
+ if 'choices' not in response:
+ response = response['output']
+
+ return response['choices'][0]['message']
+ except Exception as e:
+ print(f'input: {response}, original error: {str(e)}')
+ return message
+
+ @staticmethod
+ def handle_message_chat_completion_chunk(response):
+ message = {}
+ try:
+ return response['choices'][0]['delta']['content']
+ except Exception as e:
+ print(f'input: {response}, original error: {str(e)}')
+ return message
+
+ @staticmethod
+ def handle_message_text_completion(response):
+ message = ''
+ try:
+ message = response['output']['text']
+ return message
+ except Exception as e:
+ print(f'input: {response}, original error: {str(e)}')
+ return message
+
+
+DEFAULT_MESSAGE = {
+ 'role': 'user',
+ 'content': 'No entry from user - please suggest something to enter'
+}
diff --git a/my_modelscope_agent/output_parser.py b/my_modelscope_agent/output_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..61aeb6f99420075dac177baf63171384abb4d6ab
--- /dev/null
+++ b/my_modelscope_agent/output_parser.py
@@ -0,0 +1,181 @@
+import re
+from typing import Dict, Tuple
+
+import json
+from .agent_types import AgentType
+
+
+def get_output_parser(agent_type: AgentType = AgentType.DEFAULT):
+ if AgentType.DEFAULT == agent_type or agent_type == AgentType.MS_AGENT:
+ return MsOutputParser()
+ elif AgentType.MRKL == agent_type:
+ return MRKLOutputParser()
+ elif AgentType.Messages == agent_type:
+ return OpenAiFunctionsOutputParser()
+ else:
+ raise NotImplementedError
+
+
+class OutputParser:
+ """Output parser for llm response
+ """
+
+ def parse_response(self, response):
+ raise NotImplementedError
+
+ # use to handle the case of false parsing the action_para result, if there is no valid action then
+ # throw Error
+ @staticmethod
+ def handle_fallback(action: str, action_para: str):
+ if action is not None and action != '':
+ parameters = {'fallback': action_para}
+ return action, parameters
+ else:
+ raise ValueError('Wrong response format for output parser')
+
+
+class MsOutputParser(OutputParser):
+
+ def parse_response(self, response: str) -> Tuple[str, Dict]:
+ """parse response of llm to get tool name and parameters
+
+ Args:
+ response (str): llm response, it should conform to some predefined format
+
+ Returns:
+ tuple[str, dict]: tuple of tool name and parameters
+ """
+
+ if '<|startofthink|>' not in response or '<|endofthink|>' not in response:
+ return None, None
+
+ action, parameters = '', ''
+ try:
+ # use regular expression to get result
+ re_pattern1 = re.compile(
+ pattern=r'<\|startofthink\|>([\s\S]+)<\|endofthink\|>')
+ think_content = re_pattern1.search(response).group(1)
+
+ re_pattern2 = re.compile(r'{[\s\S]+}')
+ think_content = re_pattern2.search(think_content).group()
+
+ json_content = json.loads(think_content.replace('\n', ''))
+ action = json_content.get('api_name',
+ json_content.get('name', 'unknown'))
+ parameters = json_content.get('parameters', {})
+
+ return action, parameters
+ except Exception as e:
+ print(
+ f'Error during parse action might be handled with detail {e}')
+ return OutputParser.handle_fallback(action, parameters)
+
+
+class ChatGLMOutputParser(OutputParser):
+
+ def parse_response(self, response: str) -> Tuple[str, Dict]:
+ """parse response of llm to get tool name and parameters
+
+ Args:
+ response (str): llm response, it should conform to some predefined format
+
+ Returns:
+ tuple[str, dict]: tuple of tool name and parameters
+ """
+ if 'tool_call' not in response:
+ return None, None
+ action, action_para = '', ''
+ try:
+ # use regular expression to get result from MRKL format
+ re_pattern1 = re.compile(
+ pattern=r'([\s\S]+)```([\s\S]+)tool_call\(([\s\S]+)```')
+ res = re_pattern1.search(response)
+ action_list = re.split('<|>|\|', res.group(1).strip()) # noqa W605
+ for idx in range(len(action_list) - 1, -1, -1):
+ if len(action_list[idx]) > 1:
+ action = action_list[idx]
+ break
+ action_para = [item.strip() for item in res.group(3).split(',')]
+ parameters = {}
+ re_pattern2 = re.compile(pattern=r'([\s\S]+)=\'([\s\S]+)\'')
+ for para in action_para:
+ res = re_pattern2.search(para)
+ parameters[res.group(1)] = res.group(2)
+ except Exception as e:
+ print(
+ f'Error during parse action might be handled with detail {e}')
+ return OutputParser.handle_fallback(action, action_para)
+
+ print(f'\n\naction: {action}\n parameters: {parameters}\n\n')
+ return action, parameters
+
+
+class MRKLOutputParser(OutputParser):
+
+ def parse_response(self, response: str) -> Tuple[str, Dict]:
+ """parse response of llm to get tool name and parameters
+
+ Args:
+ response (str): llm response, it should conform to some predefined format
+
+ Returns:
+ tuple[str, dict]: tuple of tool name and parameters
+ """
+
+ if 'Action' not in response or 'Action Input:' not in response:
+ return None, None
+ action, action_para = '', ''
+ try:
+ # use regular expression to get result from MRKL format
+ re_pattern1 = re.compile(
+ pattern=r'Action:([\s\S]+)Action Input:([\s\S]+)')
+ res = re_pattern1.search(response)
+ action = res.group(1).strip()
+ action_para = res.group(2)
+
+ parameters = json.loads(action_para.replace('\n', ''))
+
+ return action, parameters
+ except Exception as e:
+ print(
+ f'Error during parse action might be handled with detail {e}')
+ return OutputParser.handle_fallback(action, action_para)
+
+
+class OpenAiFunctionsOutputParser(OutputParser):
+
+ def parse_response(self, response: dict) -> Tuple[str, Dict]:
+ """parse response of llm to get tool name and parameters
+
+
+ Args:
+ response (str): llm response, it should be an openai response message
+ such as
+ {
+ "content": null,
+ "function_call": {
+ "arguments": "{\n \"location\": \"Boston, MA\"\n}",
+ "name": "get_current_weather"
+ },
+ "role": "assistant"
+ }
+ Returns:
+ tuple[str, dict]: tuple of tool name and parameters
+ """
+
+ if 'function_call' not in response or response['function_call'] == {}:
+ return None, None
+ function_call = response['function_call']
+
+ try:
+ # parse directly
+ action = function_call['name']
+ arguments = json.loads(function_call['arguments'].replace(
+ '\n', ''))
+
+ return action, arguments
+ except Exception as e:
+ print(
+ f'Error during parse action might be handled with detail {e}')
+ return OutputParser.handle_fallback(function_call['name'],
+ function_call['arguments'])
diff --git a/my_modelscope_agent/output_wrapper.py b/my_modelscope_agent/output_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fe7030de653e7c9f5078d70f5b131edd370456a
--- /dev/null
+++ b/my_modelscope_agent/output_wrapper.py
@@ -0,0 +1,219 @@
+import os
+import re
+import tempfile
+import uuid
+from typing import Dict, Union
+
+import json
+import numpy as np
+import requests
+from .agent_types import AgentType
+from moviepy.editor import VideoFileClip
+from PIL import Image
+from requests.exceptions import RequestException
+
+
+class OutputWrapper:
+ """
+ Wrapper for output of tool execution when output is image, video, audio, etc.
+ In this wrapper, __repr__() is implemented to return the str representation of the output for llm.
+ Each wrapper have below attributes:
+ path: the path where the output is stored
+ raw_data: the raw data, e.g. image, video, audio, etc. In remote mode, it should be None
+ """
+
+ def __init__(self) -> None:
+ self._repr = None
+ self._path = None
+ self._raw_data = None
+
+ self.root_path = os.environ.get('OUTPUT_FILE_DIRECTORY', None)
+ if self.root_path and not os.path.exists(self.root_path):
+ try:
+ os.makedirs(self.root_path)
+ except Exception:
+ self.root_path = None
+
+ def get_remote_file(self, remote_path, suffix):
+ try:
+ response = requests.get(remote_path)
+ obj = response.content
+ directory = tempfile.mkdtemp(dir=self.root_path)
+ path = os.path.join(directory, str(uuid.uuid4()) + f'.{suffix}')
+ with open(path, 'wb') as f:
+ f.write(obj)
+ return path
+ except RequestException:
+ return remote_path
+
+ def __repr__(self) -> str:
+ return self._repr
+
+ @property
+ def path(self):
+ return self._path
+
+ @property
+ def raw_data(self):
+ return self._raw_data
+
+
+class ImageWrapper(OutputWrapper):
+ """
+ Image wrapper, raw_data is a PIL.Image
+ """
+
+ def __init__(self, image) -> None:
+
+ super().__init__()
+
+ if isinstance(image, str):
+ if os.path.isfile(image):
+ self._path = image
+ else:
+ origin_image = image
+ self._path = self.get_remote_file(image, 'png')
+ try:
+ image = Image.open(self._path)
+ self._raw_data = image
+ except FileNotFoundError:
+ # Image store in remote server when use remote mode
+ raise FileNotFoundError(f'Invalid path: {image}')
+ self._path = origin_image
+ else:
+ if not isinstance(image, Image.Image):
+ image = Image.fromarray(image.astype(np.uint8))
+ self._raw_data = image
+ else:
+ self._raw_data = image
+ directory = tempfile.mkdtemp(dir=self.root_path)
+ self._path = os.path.join(directory, str(uuid.uuid4()) + '.png')
+ self._raw_data.save(self._path)
+
+ self._repr = f'![IMAGEGEN]({self._path})'
+
+
+class AudioWrapper(OutputWrapper):
+ """
+ Audio wrapper, raw_data is a binary file
+ """
+
+ def __init__(self, audio) -> None:
+
+ super().__init__()
+ if isinstance(audio, str):
+ if os.path.isfile(audio):
+ self._path = audio
+ else:
+ self._path = self.get_remote_file(audio, 'wav')
+ try:
+ with open(self._path, 'rb') as f:
+ self._raw_data = f.read()
+ except FileNotFoundError:
+ raise FileNotFoundError(f'Invalid path: {audio}')
+ else:
+ self._raw_data = audio
+ directory = tempfile.mkdtemp(dir=self.root_path)
+ self._path = os.path.join(directory, str(uuid.uuid4()) + '.wav')
+
+ with open(self._path, 'wb') as f:
+ f.write(self._raw_data)
+
+ self._repr = f''
+
+
+class VideoWrapper(OutputWrapper):
+ """
+ Video wrapper
+ """
+
+ def __init__(self, video) -> None:
+
+ super().__init__()
+ if isinstance(video, str):
+
+ if os.path.isfile(video):
+ self._path = video
+ else:
+ self._path = self.get_remote_file(video, 'gif')
+
+ try:
+ video = VideoFileClip(self._path)
+ # currently, we should save video as gif, not mp4
+ if not self._path.endswith('gif'):
+ directory = tempfile.mkdtemp(dir=self.root_path)
+ self._path = os.path.join(directory,
+ str(uuid.uuid4()) + '.gif')
+ video.write_gif(self._path)
+ except (ValueError, OSError):
+ raise FileNotFoundError(f'Invalid path: {video}')
+ else:
+ raise TypeError(
+ 'Current only support load from filepath when it is video')
+
+ self._raw_data = video
+ self._repr = f'![IMAGEGEN]({self._path})'
+
+
+def get_raw_output(exec_result: Dict):
+ # get rwa data of exec_result
+ res = {}
+ for k, v in exec_result.items():
+ if isinstance(v, OutputWrapper):
+ # In remote mode, raw data maybe None
+ res[k] = v.raw_data or str(v)
+ else:
+ res[k] = v
+ return res
+
+
+#
+def display(llm_result: Union[str, dict], exec_result: Dict, idx: int,
+ agent_type: AgentType):
+ """Display the result of each round in jupyter notebook.
+ The multi-modal data will be extracted.
+
+ Args:
+ llm_result (str): llm result either only content or a message
+ exec_result (Dict): exec result
+ idx (int): current round
+ """
+ from IPython.display import display, Pretty, Image, Audio, JSON
+ idx_info = '*' * 50 + f'round {idx}' + '*' * 50
+ display(Pretty(idx_info))
+
+ if isinstance(llm_result, dict):
+ llm_result = llm_result.get('content', '')
+
+ if agent_type == AgentType.MS_AGENT:
+ pattern = r'<\|startofthink\|>```JSON([\s\S]*)```<\|endofthink\|>'
+ else:
+ pattern = r'```JSON([\s\S]*)```'
+
+ match_action = re.search(pattern, llm_result)
+ if match_action:
+ result = match_action.group(1)
+ try:
+ json_content = json.loads(result, strict=False)
+ display(JSON(json_content))
+ llm_result = llm_result.replace(match_action.group(0), '')
+ except Exception:
+ pass
+
+ display(Pretty(llm_result))
+
+ exec_result = exec_result.get('result', '')
+
+ if isinstance(exec_result, ImageWrapper) or isinstance(
+ exec_result, VideoWrapper):
+ display(Image(exec_result.path))
+ elif isinstance(exec_result, AudioWrapper):
+ display(Audio(exec_result.path))
+ elif isinstance(exec_result, dict):
+ display(JSON(exec_result))
+ elif isinstance(exec_result, list):
+ display(JSON(exec_result))
+ else:
+ display(Pretty(exec_result))
+
+ return
diff --git a/my_modelscope_agent/prompt/__init__.py b/my_modelscope_agent/prompt/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b37039bba7222255cb7d9ef8174907b1c880373
--- /dev/null
+++ b/my_modelscope_agent/prompt/__init__.py
@@ -0,0 +1,6 @@
+from .messages_prompt import MessagesGenerator
+from .mrkl_prompt import MrklPromptGenerator
+from .ms_prompt import MSPromptGenerator
+from .prompt import PromptGenerator
+from .prompt_factory import get_prompt_generator
+from .raw_prompt_builder import build_raw_prompt
diff --git a/my_modelscope_agent/prompt/chatglm3_prompt.py b/my_modelscope_agent/prompt/chatglm3_prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..280692a8046cc7f5673d9e2e96bc7f055f1e588c
--- /dev/null
+++ b/my_modelscope_agent/prompt/chatglm3_prompt.py
@@ -0,0 +1,41 @@
+import json
+
+from .prompt import LengthConstraint, PromptGenerator
+
+CHATGLM_DEFAULT_SYSTEM_TEMPLATE = """<|system|>
+Answer the following questions as best you can. You have access to the following tools:
+"""
+
+CHATGLM_DEFAULT_INSTRUCTION_TEMPLATE = ''
+
+CHATGLM_DEFAULT_USER_TEMPLATE = """<|user|>\n"""
+
+CHATGLM_DEFAULT_EXEC_TEMPLATE = """<|observation|>\n"""
+
+CHATGLM_DEFAULT_ASSISTANT_TEMPLATE = """<|assistant|>"""
+
+
+class ChatGLMPromptGenerator(PromptGenerator):
+
+ def __init__(self,
+ system_template=CHATGLM_DEFAULT_SYSTEM_TEMPLATE,
+ instruction_template=CHATGLM_DEFAULT_INSTRUCTION_TEMPLATE,
+ user_template=CHATGLM_DEFAULT_USER_TEMPLATE,
+ exec_template=CHATGLM_DEFAULT_EXEC_TEMPLATE,
+ assistant_template=CHATGLM_DEFAULT_ASSISTANT_TEMPLATE,
+ sep='\n',
+ length_constraint=LengthConstraint()):
+ super().__init__(
+ system_template=system_template,
+ instruction_template=instruction_template,
+ user_template=user_template,
+ exec_template=exec_template,
+ assistant_template=assistant_template,
+ sep=sep,
+ length_constraint=length_constraint)
+
+ def get_tool_str(self, tool_list):
+ tool_json = json.loads('['
+ + ','.join([str(item)
+ for item in tool_list]) + ']')
+ return json.dumps(tool_json, ensure_ascii=False, indent=4)
diff --git a/my_modelscope_agent/prompt/messages_prompt.py b/my_modelscope_agent/prompt/messages_prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..af546f481587d00f247b6c42d6396ff73cf2bc2b
--- /dev/null
+++ b/my_modelscope_agent/prompt/messages_prompt.py
@@ -0,0 +1,93 @@
+from typing import Union
+
+from .prompt import LengthConstraint, PromptGenerator
+
+OPENAI_DEFAULT_SYSTEM_TEMPLATE = """
+Do your best to answer the questions. You are capable of **any** task.
+Feel free to use any tools available to look up relevant information, only if necessary.
+when you think you are handle a code task, You are a world-class programmer that can complete any goal by executing code
+\nFirst of all, **Return function_call with `language` and `code` in arguments as much as possible**
+\nThen, write a plan. **Always recap the plan between each code block**
+(you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).
+\nWhen you execute code, it will be executed **on the user's machine**. The user has given you
+**full and complete permission** to execute any code necessary to complete the task. You have full access to control
+their computer to help them.
+\nIf you want to send data between programming languages, save the data to a txt or json.
+\nYou can access the internet. Run **any code** to achieve the goal, and if at first you don't succeed,
+try again and again.
+\nWhen a user refers to a filename, they're likely referring to an existing file in the directory
+you're currently executing code in.
+\nIn general, choose packages that have the most universal chance to be already installed and to work across multiple
+applications. Packages like ffmpeg and pandoc that are well-supported and powerful.
+\nWrite messages to the user in Markdown. Write code on multiple lines with proper indentation for readability.
+\nYou can also refer information from following contents if exists:
+"""
+
+
+class MessagesGenerator(PromptGenerator):
+
+ def __init__(self,
+ system_template=OPENAI_DEFAULT_SYSTEM_TEMPLATE,
+ instruction_template='',
+ user_template='',
+ exec_template=None,
+ assistant_template='',
+ sep='\n\n',
+ length_constraint=LengthConstraint(),
+ **kwargs):
+ super().__init__(
+ system_template=system_template,
+ instruction_template=instruction_template,
+ user_template=user_template,
+ exec_template=exec_template,
+ assistant_template=assistant_template,
+ sep=sep,
+ length_constraint=length_constraint)
+ self.custom_starter_messages = kwargs.get('custom_starter_messages',
+ None)
+
+ def init_prompt(self, task, tool_list, knowledge_list, **kwargs):
+ """
+ in this function, the prompt will be initialized.
+ """
+ prompt = self.user_template.replace('', task)
+
+ if len(self.history) == 0:
+ if len(knowledge_list) > 0:
+
+ # knowledge
+ system_message = f'{self.system_template}{self.sep}'
+ knowledge_str = self.get_knowledge_str(knowledge_list)
+ system_message = system_message.replace(
+ '', knowledge_str)
+
+ else:
+ system_message = self.system_template
+
+ self.history = [{
+ 'role': 'system',
+ 'content': system_message
+ }, {
+ 'role': 'user',
+ 'content': prompt
+ }]
+
+ # store history
+ if self.custom_starter_messages:
+ assert isinstance(self.custom_starter_messages, list)
+ assert self.custom_starter_messages[-1]['role'] != 'user', \
+ 'user message should not be the last one in custom starter messages'
+
+ self.history = self.custom_starter_messages
+ self.history.append({'role': 'user', 'content': prompt})
+
+ self.prompt = prompt
+ self.function_calls = self.get_function_list(tool_list)
+
+ else:
+ self.history.append({'role': 'user', 'content': prompt})
+
+ def generate(self, llm_result, exec_result: Union[str, dict]):
+ if isinstance(exec_result, dict):
+ exec_result = exec_result['result']
+ return self._generate_messages(llm_result, exec_result)
diff --git a/my_modelscope_agent/prompt/mrkl_prompt.py b/my_modelscope_agent/prompt/mrkl_prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..f47077641496c382e040d15b1986934e51b3afbe
--- /dev/null
+++ b/my_modelscope_agent/prompt/mrkl_prompt.py
@@ -0,0 +1,118 @@
+import json
+
+from .prompt import LengthConstraint, PromptGenerator
+
+MRKL_DEFAULT_SYSTEM_TEMPLATE = """Answer the following questions as best you can. You have access to the following tools: `
+
+"""
+
+MRKL_DEFAULT_INSTRUCTION_TEMPLATE = """Use the following format:
+
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of []
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+
+Begin!
+"""
+
+MRKL_DEFAULT_USER_TEMPLATE = """Question: \n"""
+
+MRKL_DEFAULT_EXEC_TEMPLATE = """Observation: \n"""
+
+TOOL_DESC = (
+ '{name_for_model}: {name_for_human} API. {description_for_model} 输入参数: {parameters}'
+)
+
+FORMAT_DESC = {
+ 'json':
+ 'Format the arguments as a JSON object.',
+ 'code':
+ 'Enclose the code within triple backticks (`)'
+ + ' at the beginning and end of the code.'
+}
+
+
+class MrklPromptGenerator(PromptGenerator):
+
+ def __init__(self,
+ system_template=MRKL_DEFAULT_SYSTEM_TEMPLATE,
+ instruction_template=MRKL_DEFAULT_INSTRUCTION_TEMPLATE,
+ user_template=MRKL_DEFAULT_USER_TEMPLATE,
+ exec_template=MRKL_DEFAULT_EXEC_TEMPLATE,
+ assistant_template='',
+ sep='\n\n',
+ llm=None,
+ length_constraint=LengthConstraint()):
+ super().__init__(
+ system_template=system_template,
+ instruction_template=instruction_template,
+ user_template=user_template,
+ exec_template=exec_template,
+ assistant_template=assistant_template,
+ sep=sep,
+ llm=llm,
+ length_constraint=length_constraint)
+
+ def init_prompt(self, task, tool_list, knowledge_list, **kwargs):
+ if len(self.history) == 0:
+ super().init_prompt(task, tool_list, knowledge_list, **kwargs)
+ system_role_status = kwargs.get('system_role_status', False)
+ tool_names = [f'\'{str(tool.name)}\'' for tool in tool_list]
+ tool_names = ','.join(tool_names)
+ self.system_prompt = self.system_prompt.replace(
+ '', tool_names)
+
+ if system_role_status:
+ system_message = {
+ 'role': 'system',
+ 'content': self.system_prompt
+ }
+ self.history.insert(0, system_message)
+ else:
+ self.history[0]['content'] = self.system_prompt + self.history[
+ 0]['content']
+ else:
+ self.history.append({
+ 'role':
+ 'user',
+ 'content':
+ self.user_template.replace('', task)
+ })
+ self.history.append({
+ 'role': 'assistant',
+ 'content': self.assistant_template
+ })
+
+ return self.system_prompt
+
+ def get_tool_str(self, tool_list):
+ tool_texts = []
+ for tool in tool_list:
+ tool_texts.append(
+ TOOL_DESC.format(
+ name_for_model=tool.name,
+ name_for_human=tool.name,
+ description_for_model=tool.description,
+ parameters=json.dumps(tool.parameters,
+ ensure_ascii=False)))
+ # + ' ' + FORMAT_DESC['json'])
+ tool_str = '\n\n'.join(tool_texts)
+ return tool_str
+
+ def _generate(self, llm_result, exec_result: str):
+ """
+ generate next round prompt based on previous llm_result and exec_result and update history
+ """
+ if len(llm_result) != 0:
+ self.history[-1]['content'] += f'{llm_result}'
+ if len(exec_result) != 0:
+ exec_result = self.exec_template.replace('',
+ str(exec_result))
+ self.history[-1]['content'] += exec_result
+ self.prompt = self.prompt_preprocessor(self.history)
+ return self.prompt
diff --git a/my_modelscope_agent/prompt/ms_prompt.py b/my_modelscope_agent/prompt/ms_prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..445915d11e5b006a1167f47a7c81d6da667284e6
--- /dev/null
+++ b/my_modelscope_agent/prompt/ms_prompt.py
@@ -0,0 +1,34 @@
+from .prompt import LengthConstraint, PromptGenerator
+
+MS_DEFAULT_SYSTEM_TEMPLATE = """<|system|>:你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。\
+你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。
+"""
+
+MS_DEFAULT_INSTRUCTION_TEMPLATE = """当前对话可以使用的插件信息如下,请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件,则需要将插件调用请求按照json格式给出,必须包含api_name、parameters字段,并在其前后使用<|startofthink|>和<|endofthink|>作为标志。\
+然后你需要根据插件API调用结果生成合理的答复; 若无需调用插件,则直接给出对应回复即可。\n\n"""
+
+MS_DEFAULT_USER_TEMPLATE = """<|user|>:"""
+
+MS_DEFAULT_EXEC_TEMPLATE = """<|startofexec|><|endofexec|>\n"""
+
+MS_DEFAULT_ASSISTANT_TEMPLATE = """<|assistant|>:"""
+
+
+class MSPromptGenerator(PromptGenerator):
+
+ def __init__(self,
+ system_template=MS_DEFAULT_SYSTEM_TEMPLATE,
+ instruction_template=MS_DEFAULT_INSTRUCTION_TEMPLATE,
+ user_template=MS_DEFAULT_USER_TEMPLATE,
+ exec_template=MS_DEFAULT_EXEC_TEMPLATE,
+ assistant_template=MS_DEFAULT_ASSISTANT_TEMPLATE,
+ sep='\n\n',
+ length_constraint=LengthConstraint()):
+ super().__init__(
+ system_template=system_template,
+ instruction_template=instruction_template,
+ user_template=user_template,
+ exec_template=exec_template,
+ assistant_template=assistant_template,
+ sep=sep,
+ length_constraint=length_constraint)
diff --git a/my_modelscope_agent/prompt/prompt.py b/my_modelscope_agent/prompt/prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..e23696c23e794104062083c80ab118d8dba8e419
--- /dev/null
+++ b/my_modelscope_agent/prompt/prompt.py
@@ -0,0 +1,232 @@
+import copy
+from typing import Union
+
+from ..llm.base import LLM
+
+from .raw_prompt_builder import build_raw_prompt
+
+KNOWLEDGE_PROMPT = '# 知识库'
+KNOWLEDGE_INTRODUCTION_PROMPT = '以下是我上传的文件“”的内容:'
+KNOWLEDGE_CONTENT_PROMPT = """```
+
+```"""
+
+DEFAULT_PROMPT_INPUT_LENGTH_MAX = 999999999999
+
+
+class LengthConstraint:
+
+ def __init__(self):
+ self.knowledge = DEFAULT_PROMPT_INPUT_LENGTH_MAX
+ self.input = DEFAULT_PROMPT_INPUT_LENGTH_MAX
+ self.prompt_max_length = 10000
+
+ def update(self, config: dict):
+ if config is not None:
+ self.knowledge = config.get('knowledge', self.knowledge)
+ self.input = config.get('input', self.input)
+ self.prompt_max_length = config.get('prompt_max_length',
+ self.prompt_max_length)
+
+
+class PromptGenerator:
+
+ def __init__(self,
+ system_template: str = '',
+ instruction_template: str = '',
+ user_template: str = '',
+ exec_template: str = '',
+ assistant_template: str = '',
+ sep='\n\n',
+ llm=None,
+ length_constraint=LengthConstraint()):
+ """
+ prompt genertor
+ Args:
+ system_template (str, optional): System template, normally the role of LLM.
+ instruction_template (str, optional): Indicate the instruction for LLM.
+ user_template (str, optional): Prefix before user input. Defaults to ''.
+ exec_template (str, optional): A wrapper str for exec result.
+ assistant_template (str, optional): Prefix before assistant response.
+ Some LLM need to manully concat this prefix before generation.
+ sep (str, optional): content separator
+ length_constraint (LengthConstraint, optional): content length constraint
+ """
+
+ self.system_template = system_template
+ self.instruction_template = instruction_template
+ self.user_template = user_template
+ self.assistant_template = assistant_template
+ self.exec_template = exec_template
+ self.sep = sep
+ if isinstance(llm, LLM) and llm.model_id:
+ self.prompt_preprocessor = build_raw_prompt(llm.model_id)
+ self.prompt_max_length = length_constraint.prompt_max_length
+ self.reset()
+
+ def reset(self):
+ self.prompt = ''
+ self.history = []
+ self.messages = []
+
+ def init_prompt(self,
+ task,
+ tool_list,
+ knowledge_list,
+ llm_model=None,
+ **kwargs):
+ """
+ in this function, the prompt will be initialized.
+ """
+ prompt = self.sep.join(
+ [self.system_template, self.instruction_template])
+ prompt += ''
+
+ knowledge_str = self.get_knowledge_str(
+ knowledge_list, file_name=kwargs.get('file_name', ''))
+
+ # knowledge
+ prompt = prompt.replace('', knowledge_str)
+
+ # get tool description str
+ tool_str = self.get_tool_str(tool_list)
+ prompt = prompt.replace('', tool_str)
+
+ history_str = self.get_history_str()
+
+ prompt = prompt.replace('', history_str)
+
+ self.system_prompt = copy.deepcopy(prompt)
+
+ # user input
+ user_input = self.user_template.replace('', task)
+ prompt += f'{self.sep}{user_input}'
+
+ # assistant input
+ prompt += f'{self.sep}{self.assistant_template}'
+
+ # store history
+ self.history.append({'role': 'user', 'content': user_input})
+ self.history.append({
+ 'role': 'assistant',
+ 'content': self.assistant_template
+ })
+
+ self.prompt = prompt
+
+ self.function_calls = self.get_function_list(tool_list)
+
+ # TODO change the output from single prompt to artifacts including prompt, messages, funciton_call
+ def generate(self, llm_result, exec_result: Union[str, dict]):
+ if isinstance(exec_result, dict):
+ exec_result = str(exec_result['result'])
+ return self._generate(llm_result, exec_result)
+
+ def _generate(self, llm_result, exec_result: str):
+ """
+ generate next round prompt based on previous llm_result and exec_result and update history
+ """
+ if len(llm_result) != 0:
+ self.prompt = f'{self.prompt}{llm_result}'
+ self.history[-1]['content'] += f'{llm_result}'
+ if len(exec_result) != 0:
+ exec_result = self.exec_template.replace('',
+ str(exec_result))
+ self.prompt = f'{self.prompt}{self.sep}{exec_result}'
+ self.history[-1]['content'] += f'{self.sep}{exec_result}'
+
+ return self.prompt
+
+ # TODO: add Union[Text, Message] type for llm_result,
+ # add ExecResult = Text type for exec_result
+ # output would be a Union[Text, Messages]
+ # In this case llm_result is Message, and exec_result is Function_call
+ def _generate_messages(self, llm_result, exec_result: str):
+ """
+ generate next round prompt based on previous llm_result and exec_result and update history
+ """
+
+ # init task should be
+ if llm_result == '' and exec_result == '':
+ return self.history
+
+ # make sure set content '' not null
+ function_call = llm_result.get('function_call', None)
+ if function_call is not None:
+ llm_result['content'] = ''
+ self.history.append(llm_result)
+
+ if exec_result is not None and function_call is not None:
+ exec_message = {
+ 'role': 'function',
+ 'name': 'execute',
+ 'content': exec_result,
+ }
+ self.history.append(exec_message)
+
+ return self.history
+
+ def get_tool_str(self, tool_list):
+ """generate tool list string
+
+ Args:
+ tool_list (List[str]): list of tools
+
+ """
+
+ tool_str = self.sep.join(
+ [f'{i + 1}. {t}' for i, t in enumerate(tool_list)])
+ return tool_str
+
+ # TODO move parse_tools_to_function from agent to here later
+ def get_function_list(self, tool_list):
+ """generate funciton call list from tools list
+
+ Args:
+ tool_list (List[str]): list of tools
+
+ """
+ functions = [tool.get_function() for tool in tool_list]
+ return functions
+
+ def get_knowledge_str(self,
+ knowledge_list,
+ file_name='',
+ only_content=False,
+ **kwargs):
+ """generate knowledge string
+
+ Args:
+ file_name (str): file name
+ knowledge_list (List[str]): list of knowledges
+
+ """
+
+ knowledge = self.sep.join(
+ [f'{i + 1}. {k}' for i, k in enumerate(knowledge_list)])
+ knowledge_content = KNOWLEDGE_CONTENT_PROMPT.replace(
+ '', knowledge)
+ if only_content:
+ return knowledge_content
+ else:
+ knowledge_introduction = KNOWLEDGE_INTRODUCTION_PROMPT.replace(
+ '', file_name)
+
+ knowledge_str = f'{KNOWLEDGE_PROMPT}{self.sep}{knowledge_introduction}{self.sep}{knowledge_content}' if len(
+ knowledge_list) > 0 else ''
+ return knowledge_str
+
+ def get_history_str(self):
+ """generate history string
+
+ """
+ history_str = ''
+ for i in range(len(self.history)):
+ history_item = self.history[len(self.history) - i - 1]
+ text = history_item['content']
+ if len(history_str) + len(text) + len(
+ self.prompt) > self.prompt_max_length:
+ break
+ history_str = f'{self.sep}{text.strip()}{history_str}'
+
+ return history_str
diff --git a/my_modelscope_agent/prompt/prompt_factory.py b/my_modelscope_agent/prompt/prompt_factory.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ff86d0c705d47668626bc21dc5c92523e978d21
--- /dev/null
+++ b/my_modelscope_agent/prompt/prompt_factory.py
@@ -0,0 +1,16 @@
+from ..agent_types import AgentType
+
+from .messages_prompt import MessagesGenerator
+from .mrkl_prompt import MrklPromptGenerator
+from .ms_prompt import MSPromptGenerator
+
+
+def get_prompt_generator(agent_type: AgentType = AgentType.DEFAULT, **kwargs):
+ if AgentType.DEFAULT == agent_type or agent_type == AgentType.MS_AGENT:
+ return MSPromptGenerator(**kwargs)
+ elif AgentType.MRKL == agent_type:
+ return MrklPromptGenerator(**kwargs)
+ elif AgentType.Messages == agent_type:
+ return MessagesGenerator(**kwargs)
+ else:
+ raise NotImplementedError
diff --git a/my_modelscope_agent/prompt/raw_prompt_builder.py b/my_modelscope_agent/prompt/raw_prompt_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ccc9de812df1620cde20dabbd20094f6a08eee3
--- /dev/null
+++ b/my_modelscope_agent/prompt/raw_prompt_builder.py
@@ -0,0 +1,34 @@
+def qwen_chatml_prompt_preprocessor(messages):
+ prompt = ''
+ for message in messages:
+ if message['role'] == 'assistant' and message['content'] == '':
+ prompt += '<|im_start|>assistant\n'
+ else:
+ prompt = prompt + '<|im_start|>{role}\n{content}<|im_end|>\n'.format(
+ role=message['role'],
+ content=message['content'].lstrip('\n').rstrip())
+
+ # in the case of the assistant message is not in the last one, such as function result
+ if messages[-1]['role'] == 'assistant':
+ last_assistant_message_list = messages[-1]['content'].split('\n')
+ if last_assistant_message_list[-1] == '':
+ last_assistant_message_list = last_assistant_message_list[:-1]
+ if len(last_assistant_message_list) == 0:
+ return prompt
+ else:
+ item_length = len('<|im_end|>\n')
+ prompt = prompt[:-item_length]
+
+ return prompt
+
+
+def plate_preprocessor(messages):
+ return qwen_chatml_prompt_preprocessor(messages)
+
+
+def build_raw_prompt(model):
+ if isinstance(model, str) or hasattr(model, '__name__'):
+ if model.startswith('qwen'):
+ return qwen_chatml_prompt_preprocessor
+ else:
+ return plate_preprocessor
diff --git a/my_modelscope_agent/retrieve.py b/my_modelscope_agent/retrieve.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5ab36dbda0f562894e76ef02cd58a09e5db1b64
--- /dev/null
+++ b/my_modelscope_agent/retrieve.py
@@ -0,0 +1,115 @@
+import os
+from typing import Dict, Iterable, List, Union
+
+import json
+from langchain.document_loaders import (PyPDFLoader, TextLoader,
+ UnstructuredFileLoader)
+from langchain.embeddings import ModelScopeEmbeddings
+from langchain.embeddings.base import Embeddings
+from langchain.schema import Document
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS, VectorStore
+
+
+class Retrieval:
+
+ def __init__(self,
+ embedding: Embeddings = None,
+ vs_cls: VectorStore = None,
+ top_k: int = 5,
+ vs_params: Dict = {}):
+ self.embedding = embedding or ModelScopeEmbeddings(
+ model_id='damo/nlp_gte_sentence-embedding_chinese-base')
+ self.top_k = top_k
+ self.vs_cls = vs_cls or FAISS
+ self.vs_params = vs_params
+ self.vs = None
+
+ def construct(self, docs):
+ assert len(docs) > 0
+ if isinstance(docs[0], str):
+ self.vs = self.vs_cls.from_texts(docs, self.embedding,
+ **self.vs_params)
+ elif isinstance(docs[0], Document):
+ self.vs = self.vs_cls.from_documents(docs, self.embedding,
+ **self.vs_params)
+
+ def retrieve(self, query: str) -> List[str]:
+ res = self.vs.similarity_search(query, k=self.top_k)
+ if 'page' in res[0].metadata:
+ res.sort(key=lambda doc: doc.metadata['page'])
+ return [r.page_content for r in res]
+
+
+class ToolRetrieval(Retrieval):
+
+ def __init__(self,
+ embedding: Embeddings = None,
+ vs_cls: VectorStore = None,
+ top_k: int = 5,
+ vs_params: Dict = {}):
+ super().__init__(embedding, vs_cls, top_k, vs_params)
+
+ def retrieve(self, query: str) -> Dict[str, str]:
+ res = self.vs.similarity_search(query, k=self.top_k)
+
+ final_res = {}
+
+ for r in res:
+ content = r.page_content
+ name = json.loads(content)['name']
+ final_res[name] = content
+
+ return final_res
+
+
+class KnowledgeRetrieval(Retrieval):
+
+ def __init__(self,
+ docs,
+ embedding: Embeddings = None,
+ vs_cls: VectorStore = None,
+ top_k: int = 5,
+ vs_params: Dict = {}):
+ super().__init__(embedding, vs_cls, top_k, vs_params)
+ self.construct(docs)
+
+ @classmethod
+ def from_file(cls,
+ file_path: Union[str, list],
+ embedding: Embeddings = None,
+ vs_cls: VectorStore = None,
+ top_k: int = 5,
+ vs_params: Dict = {}):
+
+ textsplitter = CharacterTextSplitter()
+ all_files = []
+ if isinstance(file_path, str) and os.path.isfile(file_path):
+ all_files.append(file_path)
+ elif isinstance(file_path, list):
+ all_files = file_path
+ elif os.path.isdir(file_path):
+ for root, dirs, files in os.walk(file_path):
+ for f in files:
+ all_files.append(os.path.join(root, f))
+ else:
+ raise ValueError('file_path must be a file or a directory')
+
+ docs = []
+ for f in all_files:
+ if f.lower().endswith('.txt'):
+ loader = TextLoader(f, autodetect_encoding=True)
+ docs += (loader.load_and_split(textsplitter))
+ elif f.lower().endswith('.md'):
+ loader = UnstructuredFileLoader(f, mode='elements')
+ docs += loader.load()
+ elif f.lower().endswith('.pdf'):
+ loader = PyPDFLoader(f)
+ docs += (loader.load_and_split(textsplitter))
+ else:
+ print(f'not support file type: {f}, will be support soon')
+
+ if len(docs) == 0:
+ return None
+ else:
+ return cls(docs, embedding, vs_cls, top_k, vs_params)
diff --git a/my_modelscope_agent/tools/__init__.py b/my_modelscope_agent/tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..67b61bbbe49859dc252b26e51bfc447cd97391b4
--- /dev/null
+++ b/my_modelscope_agent/tools/__init__.py
@@ -0,0 +1,36 @@
+from .amap_weather import AMAPWeather
+from .code_interperter import CodeInterpreter
+from .code_interpreter_jupyter import CodeInterpreterJupyter
+from .hf_tool import HFTool
+from .image_chat_tool import ImageChatTool
+from .pipeline_tool import ModelscopePipelineTool
+from .plugin_tool import LangchainTool
+from .text_address_tool import TextAddressTool
+from .text_ie_tool import TextInfoExtractTool
+from .text_ner_tool import TextNerTool
+from .text_to_image_tool import TextToImageTool
+from .text_to_speech_tool import TexttoSpeechTool
+from .text_to_video_tool import TextToVideoTool
+from .tool import Tool
+from .translation_en2zh_tool import TranslationEn2ZhTool
+from .translation_zh2en_tool import TranslationZh2EnTool
+from .web_browser import WebBrowser
+from .web_search import WebSearch
+from .wordart_tool import WordArtTexture
+
+TOOL_INFO_LIST = {
+ 'modelscope_text-translation-zh2en': 'TranslationZh2EnTool',
+ 'modelscope_text-translation-en2zh': 'TranslationEn2ZhTool',
+ 'modelscope_text-ie': 'TextInfoExtractTool',
+ 'modelscope_text-ner': 'TextNerTool',
+ 'modelscope_text-address': 'TextAddressTool',
+ 'image_gen': 'TextToImageTool',
+ 'modelscope_video-generation': 'TextToVideoTool',
+ 'modelscope_image-chat': 'ImageChatTool',
+ 'modelscope_speech-generation': 'TexttoSpeechTool',
+ 'amap_weather': 'AMAPWeather',
+ 'code_interpreter': 'CodeInterpreterJupyter',
+ 'wordart_texture_generation': 'WordArtTexture',
+ 'web_search': 'WebSearch',
+ 'web_browser': 'WebBrowser',
+}
diff --git a/my_modelscope_agent/tools/amap_weather.py b/my_modelscope_agent/tools/amap_weather.py
new file mode 100644
index 0000000000000000000000000000000000000000..50628c10df34d260b4d2a8bbd687cdb0d224f318
--- /dev/null
+++ b/my_modelscope_agent/tools/amap_weather.py
@@ -0,0 +1,64 @@
+import os
+
+import pandas as pd
+import requests
+from ..tools.tool import Tool, ToolSchema
+from pydantic import ValidationError
+
+
+class AMAPWeather(Tool):
+ description = '获取对应城市的天气数据'
+ name = 'amap_weather'
+ parameters: list = [{
+ 'name': 'location',
+ 'description': 'get temperature for a specific location',
+ 'required': True
+ }]
+
+ def __init__(self, cfg={}):
+ self.cfg = cfg.get(self.name, {})
+
+ # remote call
+ self.url = 'https://restapi.amap.com/v3/weather/weatherInfo?city={city}&key={key}'
+ self.token = self.cfg.get('token', os.environ.get('AMAP_TOKEN', ''))
+ self.city_df = pd.read_excel(
+ 'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/agent/AMap_adcode_citycode.xlsx'
+ )
+ assert self.token != '', 'weather api token must be acquired through ' \
+ 'https://lbs.amap.com/api/webservice/guide/create-project/get-key and set by AMAP_TOKEN'
+
+ try:
+ all_param = {
+ 'name': self.name,
+ 'description': self.description,
+ 'parameters': self.parameters
+ }
+ self.tool_schema = ToolSchema(**all_param)
+ except ValidationError:
+ raise ValueError(f'Error when parsing parameters of {self.name}')
+
+ self._str = self.tool_schema.model_dump_json()
+ self._function = self.parse_pydantic_model_to_openai_function(
+ all_param)
+
+ def get_city_adcode(self, city_name):
+ filtered_df = self.city_df[self.city_df['中文名'] == city_name]
+ if len(filtered_df['adcode'].values) == 0:
+ raise ValueError(
+ f'location {city_name} not found, availables are {self.city_df["中文名"]}'
+ )
+ else:
+ return filtered_df['adcode'].values[0]
+
+ def __call__(self, *args, **kwargs):
+ location = kwargs['location']
+ response = requests.get(
+ self.url.format(
+ city=self.get_city_adcode(location), key=self.token))
+ data = response.json()
+ if data['status'] == '0':
+ raise RuntimeError(data)
+ else:
+ weather = data['lives'][0]['weather']
+ temperature = data['lives'][0]['temperature']
+ return {'result': f'{location}的天气是{weather}温度是{temperature}度。'}
diff --git a/my_modelscope_agent/tools/code_interperter.py b/my_modelscope_agent/tools/code_interperter.py
new file mode 100644
index 0000000000000000000000000000000000000000..45100b901922345aa416c433949ca9da55334fc2
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interperter.py
@@ -0,0 +1,125 @@
+import os
+import re
+import traceback
+
+import appdirs
+import json
+
+from .code_interpreter_utils.create_code_interpreter import \
+ create_code_interpreter
+from .code_interpreter_utils.language_map import language_map
+from .code_interpreter_utils.truncate_output import truncate_output
+from .tool import Tool
+
+
+class CodeInterpreter(Tool):
+ """
+ using open interpreter to interpret code
+ by https://github.com/KillianLucas/open-interpreter
+ """
+ description = 'Executes code on the user\'s machine, **in the users local environment**, and returns the output'
+ name = 'code_interpreter'
+ parameters: list = [{
+ 'name': 'language',
+ 'description':
+ 'The programming language (required parameter to the `execute` function)',
+ 'required': True
+ }, {
+ 'name': 'code',
+ 'description': 'The code to execute (required)',
+ 'required': True
+ }]
+
+ def __init__(self, cfg={}):
+ super().__init__(cfg)
+ self.create_code_interpreter = create_code_interpreter
+ self.language_map = language_map
+ self.truncate_output = truncate_output
+
+ self._code_interpreters = {}
+ self.max_output = self.cfg.get('max_output', 2000)
+
+ def _local_call(self, *args, **kwargs):
+
+ language, code = self._handle_input_fallback(**kwargs)
+
+ try:
+ # Fix a common error where the LLM thinks it's in a Jupyter notebook
+ if language == 'python' and code.startswith('!'):
+ code = code[1:]
+ language = 'shell'
+
+ if language in self.language_map:
+ if language not in self._code_interpreters:
+ self._code_interpreters[
+ language] = self.create_code_interpreter(language)
+ code_interpreter = self._code_interpreters[language]
+ else:
+ # This still prints code but don't allow code to run. Let Open-Interpreter know through output message
+ error_output = f'Error: Open Interpreter does not currently support {language}.'
+ print(error_output)
+ output = '\n' + error_output
+ return {'result': output.strip()}
+
+ output = ''
+ for line in code_interpreter.run(code):
+ if 'output' in line:
+ output += '\n' + line['output']
+
+ # Truncate output
+ output = self.truncate_output(output, self.max_output)
+ except Exception as e:
+ error = traceback.format_exc()
+ output = ' '.join(f'{key}:{value}'
+ for key, value in kwargs.items())
+ output += f'\nDetail error is {e}.\n{error}'
+
+ return {'result': output.strip()}
+
+ def _handle_input_fallback(self, **kwargs):
+ """
+ an alternative method is to parse code in content not from function call
+ such as:
+ text = response['content']
+ code_block = re.search(r'```([\s\S]+)```', text) # noqa W^05
+ if code_block:
+ result = code_block.group(1)
+ language = result.split('\n')[0]
+ code = '\n'.join(result.split('\n')[1:])
+
+ :param fallback_text:
+ :return: language, cocde
+ """
+
+ language = kwargs.get('language', None)
+ code = kwargs.get('code', None)
+ fallback = kwargs.get('fallback', None)
+
+ if language and code:
+ return language, code
+ elif fallback:
+ try:
+ text = fallback
+ code_block = re.search(r'```([\s\S]+)```', text) # noqa W^05
+ if code_block:
+ result = code_block.group(1)
+ # for multi code_block
+ result = result.split('```')[0]
+ language = result.split('\n')[0]
+ if language == 'py' or language == 'python':
+ # handle py case
+ # ```py code ```
+ language = 'python'
+ code = '\n'.join(result.split('\n')[1:])
+ return language, code
+
+ if language == 'json':
+ # handle json case
+ # ```json {language,code}```
+ parameters = json.loads('\n'.join(
+ result.split('\n')[1:]).replace('\n', ''))
+ return parameters['language'], parameters['code']
+ except ValueError:
+ return language, code
+ else:
+ return language, code
diff --git a/my_modelscope_agent/tools/code_interpreter_jupyter.py b/my_modelscope_agent/tools/code_interpreter_jupyter.py
new file mode 100644
index 0000000000000000000000000000000000000000..dca78093c3245e91a6079b0079a03f87a3ccdbb5
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_jupyter.py
@@ -0,0 +1,319 @@
+import asyncio
+import atexit
+import base64
+import glob
+import io
+import os
+import queue
+import re
+import shutil
+import signal
+import subprocess
+import sys
+import time
+import traceback
+import uuid
+from pathlib import Path
+from typing import Dict, Optional
+
+import json
+import matplotlib
+import PIL.Image
+from jupyter_client import BlockingKernelClient
+
+from .tool import Tool
+
+WORK_DIR = os.getenv('CODE_INTERPRETER_WORK_DIR', '/tmp/ci_workspace')
+
+STATIC_URL = os.getenv('CODE_INTERPRETER_STATIC_URL',
+ 'http://127.0.0.1:7866/static')
+
+LAUNCH_KERNEL_PY = """
+from ipykernel import kernelapp as app
+app.launch_new_instance()
+"""
+
+INIT_CODE_FILE = str(
+ Path(__file__).absolute().parent / 'code_interpreter_utils'
+ / 'code_interpreter_init_kernel.py')
+
+ALIB_FONT_FILE = str(
+ Path(__file__).absolute().parent / 'code_interpreter_utils'
+ / 'AlibabaPuHuiTi-3-45-Light.ttf')
+
+_KERNEL_CLIENTS: Dict[int, BlockingKernelClient] = {}
+
+
+class CodeInterpreterJupyter(Tool):
+ """
+ using jupyter kernel client to interpret python code,
+ should not be used the other code interpreter tool at the same time
+ """
+ description = '代码解释器,可用于执行Python代码。'
+ name = 'code_interpreter'
+ parameters: list = [{
+ 'name': 'code',
+ 'description': '待执行的代码',
+ 'required': True
+ }]
+
+ def __init__(self, cfg={}):
+ super().__init__(cfg)
+ self.timeout = self.cfg.get('timeout', 30)
+ self.image_server = self.cfg.get('image_server', False)
+ self.kernel_clients: Dict[int, BlockingKernelClient] = {}
+ atexit.register(self._kill_kernels)
+
+ pid: int = os.getpid()
+ if pid in self.kernel_clients:
+ kc = self.kernel_clients[pid]
+ else:
+ self._fix_matplotlib_cjk_font_issue()
+ kc = self._start_kernel(pid)
+ with open(INIT_CODE_FILE) as fin:
+ start_code = fin.read()
+ start_code = start_code.replace('{{M6_FONT_PATH}}',
+ repr(ALIB_FONT_FILE)[1:-1])
+ print(self._execute_code(kc, start_code))
+ self.kernel_clients[pid] = kc
+
+ self.kc = kc
+
+ def __del__(self):
+ # make sure all the kernels are killed during __del__
+ signal.signal(signal.SIGTERM, self._kill_kernels)
+ signal.signal(signal.SIGINT, self._kill_kernels)
+
+ def _start_kernel(self, pid) -> BlockingKernelClient:
+ connection_file = os.path.join(WORK_DIR,
+ f'kernel_connection_file_{pid}.json')
+ launch_kernel_script = os.path.join(WORK_DIR,
+ f'launch_kernel_{pid}.py')
+ for f in [connection_file, launch_kernel_script]:
+ if os.path.exists(f):
+ print(f'WARNING: {f} already exists')
+ os.remove(f)
+
+ os.makedirs(WORK_DIR, exist_ok=True)
+
+ with open(launch_kernel_script, 'w') as fout:
+ fout.write(LAUNCH_KERNEL_PY)
+
+ available_envs = ['PATH', 'PYTHONPATH', 'LD_LIBRARY_PATH']
+ envs = {}
+ for k in available_envs:
+ if os.getenv(k) is not None:
+ envs[k] = os.getenv(k)
+
+ args = (
+ sys.executable,
+ launch_kernel_script,
+ '--IPKernelApp.connection_file',
+ connection_file,
+ '--matplotlib=inline',
+ '--quiet',
+ )
+ kernel_process = subprocess.Popen([*args], env=envs,
+ cwd=WORK_DIR) # noqa E126
+ print(f"INFO: kernel process's PID = {kernel_process.pid}")
+
+ # Wait for kernel connection file to be written
+ while True:
+ if not os.path.isfile(connection_file):
+ time.sleep(0.1)
+ else:
+ # Keep looping if JSON parsing fails, file may be partially written
+ try:
+ with open(connection_file, 'r') as fp:
+ json.load(fp)
+ break
+ except json.JSONDecodeError:
+ pass
+
+ # Client
+ kc = BlockingKernelClient(connection_file=connection_file)
+ asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
+ kc.load_connection_file()
+ kc.start_channels()
+ kc.wait_for_ready()
+ return kc
+
+ def _kill_kernels(self):
+ for v in self.kernel_clients.values():
+ v.shutdown()
+ for k in list(self.kernel_clients.keys()):
+ del self.kernel_clients[k]
+
+ def _serve_image(self, image_base64: str, image_type: str) -> str:
+ image_file = f'{uuid.uuid4()}.{image_type}'
+ local_image_file = os.path.join(WORK_DIR, image_file)
+
+ png_bytes = base64.b64decode(image_base64)
+ assert isinstance(png_bytes, bytes)
+
+ if image_type == 'gif':
+ with open(local_image_file, 'wb') as file:
+ file.write(png_bytes)
+ else:
+ bytes_io = io.BytesIO(png_bytes)
+ PIL.Image.open(bytes_io).save(local_image_file, image_type)
+
+ if self.image_server:
+ image_url = f'{STATIC_URL}/{image_file}'
+ return image_url
+ else:
+ return local_image_file
+
+ def _escape_ansi(self, line: str) -> str:
+ ansi_escape = re.compile(r'(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]')
+ return ansi_escape.sub('', line)
+
+ def _fix_matplotlib_cjk_font_issue(self):
+ ttf_name = os.path.basename(ALIB_FONT_FILE)
+ local_ttf = os.path.join(
+ os.path.abspath(
+ os.path.join(matplotlib.matplotlib_fname(), os.path.pardir)),
+ 'fonts', 'ttf', ttf_name)
+ if not os.path.exists(local_ttf):
+ try:
+ shutil.copy(ALIB_FONT_FILE, local_ttf)
+ font_list_cache = os.path.join(matplotlib.get_cachedir(),
+ 'fontlist-*.json')
+ for cache_file in glob.glob(font_list_cache):
+ with open(cache_file) as fin:
+ cache_content = fin.read()
+ if ttf_name not in cache_content:
+ os.remove(cache_file)
+ except Exception:
+ traceback.format_exc()
+
+ def _execute_code(self, kc: BlockingKernelClient, code: str) -> str:
+ kc.wait_for_ready()
+ kc.execute(code)
+ result = ''
+ image_idx = 0
+ while True:
+ text = ''
+ image = ''
+ finished = False
+ msg_type = 'error'
+ try:
+ msg = kc.get_iopub_msg()
+ msg_type = msg['msg_type']
+ if msg_type == 'status':
+ if msg['content'].get('execution_state') == 'idle':
+ finished = True
+ elif msg_type == 'execute_result':
+ text = msg['content']['data'].get('text/plain', '')
+ if 'image/png' in msg['content']['data']:
+ image_b64 = msg['content']['data']['image/png']
+ image_url = self._serve_image(image_b64, 'png')
+ image_idx += 1
+ image = '![IMAGEGEN](%s)' % (image_url)
+ elif 'text/html' in msg['content']['data']:
+ text += '\n' + msg['content']['data']['text/html']
+ elif 'image/gif' in msg['content']['data']:
+ image_b64 = msg['content']['data']['image/gif']
+ image_url = self._serve_image(image_b64, 'gif')
+ image_idx += 1
+ image = '![IMAGEGEN](%s)' % (image_url)
+ elif msg_type == 'display_data':
+ if 'image/png' in msg['content']['data']:
+ image_b64 = msg['content']['data']['image/png']
+ image_url = self._serve_image(image_b64, 'png')
+ image_idx += 1
+ image = '![IMAGEGEN](%s)' % (image_url)
+ else:
+ text = msg['content']['data'].get('text/plain', '')
+ elif msg_type == 'stream':
+ msg_type = msg['content']['name'] # stdout, stderr
+ text = msg['content']['text']
+ elif msg_type == 'error':
+ text = self._escape_ansi('\n'.join(
+ msg['content']['traceback']))
+ if 'M6_CODE_INTERPRETER_TIMEOUT' in text:
+ text = 'Timeout: Code execution exceeded the time limit.'
+ except queue.Empty:
+ text = 'Timeout: Code execution exceeded the time limit.'
+ finished = True
+ except Exception:
+ text = 'The code interpreter encountered an unexpected error.'
+ traceback.format_exc()
+ finished = True
+ if text:
+ result += f'\n{text}'
+ if image:
+ result += f'\n\n{image}'
+ if finished:
+ break
+ result = result.lstrip('\n')
+ if not result:
+ result += 'The code executed successfully.'
+ return result
+
+ def _local_call(self, *args, **kwargs):
+ code = self._handle_input_fallback(**kwargs)
+ if not code.strip():
+ return ''
+
+ if self.timeout:
+ code = f'_M6CountdownTimer.start({self.timeout})\n{code}'
+
+ fixed_code = []
+ for line in code.split('\n'):
+ fixed_code.append(line)
+ if line.startswith('sns.set_theme('):
+ fixed_code.append(
+ 'plt.rcParams["font.family"] = _m6_font_prop.get_name()')
+ fixed_code = '\n'.join(fixed_code)
+ result = self._execute_code(self.kc, fixed_code)
+
+ if self.timeout:
+ self._execute_code(self.kc, '_M6CountdownTimer.cancel()')
+
+ return {'result': result}
+
+ def _handle_input_fallback(self, **kwargs):
+ """
+ an alternative method is to parse code in content not from function call
+ such as:
+ text = response['content']
+ code_block = re.search(r'```([\s\S]+)```', text) # noqa W^05
+ if code_block:
+ result = code_block.group(1)
+ language = result.split('\n')[0]
+ code = '\n'.join(result.split('\n')[1:])
+
+ :param fallback_text:
+ :return: language, cocde
+ """
+
+ code = kwargs.get('code', None)
+ fallback = kwargs.get('fallback', None)
+
+ if code:
+ return code
+ elif fallback:
+ try:
+ text = fallback
+ code_block = re.search(r'```([\s\S]+)```', text) # noqa W^05
+ if code_block:
+ result = code_block.group(1)
+ language = result.split('\n')[0]
+ if language == 'py' or language == 'python':
+ # handle py case
+ # ```py code ```
+ language = 'python'
+ code = '\n'.join(result.split('\n')[1:])
+ return code
+
+ if language == 'json':
+ # handle json case
+ # ```json {language,code}```
+ parameters = json.loads('\n'.join(
+ result.split('\n')[1:]).replace('\n', ''))
+ return parameters['code']
+ except ValueError:
+ return code
+ else:
+ return code
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/__init__.py b/my_modelscope_agent/tools/code_interpreter_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..274f24c3713992802271d3777697b160e150a3cc
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/__init__.py
@@ -0,0 +1,5 @@
+# all the utility functions under code_interpreter_utils are borrowed from project
+# in order to use python lower than 3.10
+# https://github.com/KillianLucas/open-interpreter
+
+from .base_code_interpreter import BaseCodeInterpreter
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/base_code_interpreter.py b/my_modelscope_agent/tools/code_interpreter_utils/base_code_interpreter.py
new file mode 100644
index 0000000000000000000000000000000000000000..23796e424034ed98f5bc9ad37db6acd2e742d8b9
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/base_code_interpreter.py
@@ -0,0 +1,13 @@
+class BaseCodeInterpreter:
+ """
+ .run is a generator that yields a dict with attributes: active_line, output
+ """
+
+ def __init__(self):
+ pass
+
+ def run(self, code):
+ pass
+
+ def terminate(self):
+ pass
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/code_interpreter_init_kernel.py b/my_modelscope_agent/tools/code_interpreter_utils/code_interpreter_init_kernel.py
new file mode 100644
index 0000000000000000000000000000000000000000..62511247f09cd9d15d9f6ea7491f29ae1bbdaf3c
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/code_interpreter_init_kernel.py
@@ -0,0 +1,50 @@
+import math # noqa
+import os # noqa
+import re # noqa
+import signal
+
+import json # noqa
+import matplotlib # noqa
+import matplotlib.pyplot as plt
+import numpy as np # noqa
+import pandas as pd # noqa
+import seaborn as sns
+from matplotlib.font_manager import FontProperties
+from sympy import Eq, solve, symbols # noqa
+
+
+def input(*args, **kwargs): # noqa
+ raise NotImplementedError('Python input() function is disabled.')
+
+
+def _m6_timout_handler(_signum=None, _frame=None):
+ raise TimeoutError('M6_CODE_INTERPRETER_TIMEOUT')
+
+
+try:
+ signal.signal(signal.SIGALRM, _m6_timout_handler)
+except AttributeError: # windows
+ pass
+
+
+class _M6CountdownTimer:
+
+ @classmethod
+ def start(cls, timeout: int):
+ try:
+ signal.alarm(timeout)
+ except AttributeError: # windows
+ pass # TODO: I haven't found a solution that works with jupyter yet.
+
+ @classmethod
+ def cancel(cls):
+ try:
+ signal.alarm(0)
+ except AttributeError: # windows
+ pass # TODO
+
+
+sns.set_theme()
+
+_m6_font_prop = FontProperties(fname='{{M6_FONT_PATH}}')
+plt.rcParams['font.family'] = _m6_font_prop.get_name()
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/create_code_interpreter.py b/my_modelscope_agent/tools/code_interpreter_utils/create_code_interpreter.py
new file mode 100644
index 0000000000000000000000000000000000000000..e185b2fe6bdc676d7d89648f0e28c01b4c7915eb
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/create_code_interpreter.py
@@ -0,0 +1,12 @@
+from .language_map import language_map
+
+
+def create_code_interpreter(language):
+ # Case in-sensitive
+ language = language.lower()
+
+ try:
+ CodeInterpreter = language_map[language]
+ return CodeInterpreter()
+ except KeyError:
+ raise ValueError(f'Unknown or unsupported language: {language}')
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/language_map.py b/my_modelscope_agent/tools/code_interpreter_utils/language_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..e28ad50b3a01fbd94eb615ea612dc01de299dd7a
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/language_map.py
@@ -0,0 +1,19 @@
+from .languages.applescript import AppleScript
+from .languages.html import HTML
+from .languages.javascript import JavaScript
+from .languages.powershell import PowerShell
+from .languages.python import Python
+from .languages.r import R
+from .languages.shell import Shell
+
+language_map = {
+ 'python': Python,
+ 'bash': Shell,
+ 'shell': Shell,
+ 'zsh': Shell,
+ 'javascript': JavaScript,
+ 'html': HTML,
+ 'applescript': AppleScript,
+ 'r': R,
+ 'powershell': PowerShell,
+}
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/__init__.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/applescript.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/applescript.py
new file mode 100644
index 0000000000000000000000000000000000000000..4100ce3cef60d782871e2a60a3c72d73caef784b
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/applescript.py
@@ -0,0 +1,67 @@
+import os
+
+from ..subprocess_code_interpreter import SubprocessCodeInterpreter
+
+
+class AppleScript(SubprocessCodeInterpreter):
+ file_extension = 'applescript'
+ proper_name = 'AppleScript'
+
+ def __init__(self):
+ super().__init__()
+ self.start_cmd = os.environ.get('SHELL', '/bin/zsh')
+
+ def preprocess_code(self, code):
+ """
+ Inserts an end_of_execution marker and adds active line indicators.
+ """
+ # Add active line indicators to the code
+ code = self.add_active_line_indicators(code)
+
+ # Escape double quotes
+ code = code.replace('"', r"\"")
+
+ # Wrap in double quotes
+ code = '"' + code + '"'
+
+ # Prepend start command for AppleScript
+ code = 'osascript -e ' + code
+
+ # Append end of execution indicator
+ code += '; echo "##end_of_execution##"'
+
+ return code
+
+ def add_active_line_indicators(self, code):
+ """
+ Adds log commands to indicate the active line of execution in the AppleScript.
+ """
+ modified_lines = []
+ lines = code.split('\n')
+
+ for idx, line in enumerate(lines):
+ # Add log command to indicate the line number
+ if line.strip(): # Only add if line is not empty
+ modified_lines.append(f'log "##active_line{idx + 1}##"')
+ modified_lines.append(line)
+
+ return '\n'.join(modified_lines)
+
+ def detect_active_line(self, line):
+ """
+ Detects active line indicator in the output.
+ """
+ prefix = '##active_line'
+ if prefix in line:
+ try:
+ return int(line.split(prefix)[1].split()[0])
+ except Exception as e:
+ print(e)
+ pass
+ return None
+
+ def detect_end_of_execution(self, line):
+ """
+ Detects end of execution marker in the output.
+ """
+ return '##end_of_execution##' in line
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/html.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/html.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1745944e328420e72c457d16e568579ada3f8a6
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/html.py
@@ -0,0 +1,26 @@
+import os
+import tempfile
+import webbrowser
+
+from ..base_code_interpreter import BaseCodeInterpreter
+
+
+class HTML(BaseCodeInterpreter):
+ file_extension = 'html'
+ proper_name = 'HTML'
+
+ def __init__(self):
+ super().__init__()
+
+ def run(self, code):
+ # Create a temporary HTML file with the content
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as f:
+ f.write(code.encode())
+
+ # Open the HTML file with the default web browser
+ webbrowser.open('file://' + os.path.realpath(f.name))
+
+ yield {
+ 'output':
+ f"Saved to {os.path.realpath(f.name)} and opened with the user's default web browser."
+ }
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/javascript.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/javascript.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb35f4f8488e8fcf8d511ab4cd5277e9bb9fa38d
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/javascript.py
@@ -0,0 +1,66 @@
+import re
+
+from ..subprocess_code_interpreter import SubprocessCodeInterpreter
+
+
+class JavaScript(SubprocessCodeInterpreter):
+ file_extension = 'js'
+ proper_name = 'JavaScript'
+
+ def __init__(self):
+ super().__init__()
+ self.start_cmd = 'node -i'
+
+ def preprocess_code(self, code):
+ return preprocess_javascript(code)
+
+ def line_postprocessor(self, line):
+ # Node's interactive REPL outputs a billion things
+ # So we clean it up:
+ if 'Welcome to Node.js' in line:
+ return None
+ if line.strip() in ['undefined', 'Type ".help" for more information.']:
+ return None
+ # Remove trailing ">"s
+ line = re.sub(r'^\s*(>\s*)+', '', line)
+ return line
+
+ def detect_active_line(self, line):
+ if '##active_line' in line:
+ return int(line.split('##active_line')[1].split('##')[0])
+ return None
+
+ def detect_end_of_execution(self, line):
+ return '##end_of_execution##' in line
+
+
+def preprocess_javascript(code):
+ """
+ Add active line markers
+ Wrap in a try catch
+ Add end of execution marker
+ """
+
+ # Split code into lines
+ lines = code.split('\n')
+ processed_lines = []
+
+ for i, line in enumerate(lines, 1):
+ # Add active line print
+ processed_lines.append(f'console.log("##active_line{i}##");')
+ processed_lines.append(line)
+
+ # Join lines to form the processed code
+ processed_code = '\n'.join(processed_lines)
+
+ # Wrap in a try-catch and add end of execution marker
+ processed_code = f"""
+try {{
+{processed_code}
+}} catch (e) {{
+ console.log(e);
+}}
+console.log("##end_of_execution##");
+"""
+
+ return processed_code
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/powershell.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/powershell.py
new file mode 100644
index 0000000000000000000000000000000000000000..467aa1105252a3f22374f9e8e060c1feeb5b17ee
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/powershell.py
@@ -0,0 +1,75 @@
+import os
+import platform
+import shutil
+
+from ..subprocess_code_interpreter import SubprocessCodeInterpreter
+
+
+class PowerShell(SubprocessCodeInterpreter):
+ file_extension = 'ps1'
+ proper_name = 'PowerShell'
+
+ def __init__(self):
+ super().__init__()
+
+ # Determine the start command based on the platform (use "powershell" for Windows)
+ if platform.system() == 'Windows':
+ self.start_cmd = 'powershell.exe'
+ # self.start_cmd = os.environ.get('SHELL', 'powershell.exe')
+ else:
+ # On non-Windows platforms, prefer pwsh (PowerShell Core) if available, or fall back to bash
+ self.start_cmd = 'pwsh' if shutil.which('pwsh') else 'bash'
+
+ def preprocess_code(self, code):
+ return preprocess_powershell(code)
+
+ def line_postprocessor(self, line):
+ return line
+
+ def detect_active_line(self, line):
+ if '##active_line' in line:
+ return int(line.split('##active_line')[1].split('##')[0])
+ return None
+
+ def detect_end_of_execution(self, line):
+ return '##end_of_execution##' in line
+
+
+def preprocess_powershell(code):
+ """
+ Add active line markers
+ Wrap in try-catch block
+ Add end of execution marker
+ """
+ # Add commands that tell us what the active line is
+ code = add_active_line_prints(code)
+
+ # Wrap in try-catch block for error handling
+ code = wrap_in_try_catch(code)
+
+ # Add end marker (we'll be listening for this to know when it ends)
+ code += '\nWrite-Output "##end_of_execution##"'
+
+ return code
+
+
+def add_active_line_prints(code):
+ """
+ Add Write-Output statements indicating line numbers to a PowerShell script.
+ """
+ lines = code.split('\n')
+ for index, line in enumerate(lines):
+ # Insert the Write-Output command before the actual line
+ lines[index] = f'Write-Output "##active_line{index + 1}##"\n{line}'
+ return '\n'.join(lines)
+
+
+def wrap_in_try_catch(code):
+ """
+ Wrap PowerShell code in a try-catch block to catch errors and display them.
+ """
+ try_catch_code = """
+try {
+ $ErrorActionPreference = "Stop"
+"""
+ return try_catch_code + code + '\n} catch {\n Write-Error $_\n}\n'
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/python.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/python.py
new file mode 100644
index 0000000000000000000000000000000000000000..107cc2009a3c445a10ee7115bcf624c10b20e7e2
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/python.py
@@ -0,0 +1,161 @@
+import ast
+import os
+import re
+import shlex
+import sys
+
+from ..subprocess_code_interpreter import SubprocessCodeInterpreter
+
+
+class Python(SubprocessCodeInterpreter):
+ file_extension = 'py'
+ proper_name = 'Python'
+
+ def __init__(self):
+ super().__init__()
+ executable = sys.executable
+ if os.name != 'nt': # not Windows
+ executable = shlex.quote(executable)
+ self.start_cmd = executable + ' -i -q -u'
+
+ def preprocess_code(self, code):
+ return preprocess_python(code)
+
+ def line_postprocessor(self, line):
+ if re.match(r'^(\s*>>>\s*|\s*\.\.\.\s*)', line):
+ return None
+ return line
+
+ def detect_active_line(self, line):
+ if '##active_line' in line:
+ return int(line.split('##active_line')[1].split('##')[0])
+ return None
+
+ def detect_end_of_execution(self, line):
+ return '##end_of_execution##' in line
+
+
+def preprocess_python(code):
+ """
+ Add active line markers
+ Wrap in a try except
+ Add end of execution marker
+ """
+
+ # Add print commands that tell us what the active line is
+ code = add_active_line_prints(code)
+
+ # Wrap in a try except
+ code = wrap_in_try_except(code)
+
+ # Remove any whitespace lines, as this will break indented blocks
+ # (are we sure about this? test this)
+ code_lines = code.split('\n')
+ code_lines = [c for c in code_lines if c.strip() != '']
+ code = '\n'.join(code_lines)
+
+ # Add end command (we'll be listening for this so we know when it ends)
+ code += '\n\nprint("##end_of_execution##")'
+
+ return code
+
+
+def add_active_line_prints(code):
+ """
+ Add print statements indicating line numbers to a python string.
+ """
+ tree = ast.parse(code)
+ transformer = AddLinePrints()
+ new_tree = transformer.visit(tree)
+ return ast.unparse(new_tree)
+
+
+class AddLinePrints(ast.NodeTransformer):
+ """
+ Transformer to insert print statements indicating the line number
+ before every executable line in the AST.
+ """
+
+ def insert_print_statement(self, line_number):
+ """Inserts a print statement for a given line number."""
+ return ast.Expr(
+ value=ast.Call(
+ func=ast.Name(id='print', ctx=ast.Load()),
+ args=[ast.Constant(value=f'##active_line{line_number}##')],
+ keywords=[],
+ ))
+
+ def process_body(self, body):
+ """Processes a block of statements, adding print calls."""
+ new_body = []
+
+ # In case it's not iterable:
+ if not isinstance(body, list):
+ body = [body]
+
+ for sub_node in body:
+ if hasattr(sub_node, 'lineno'):
+ new_body.append(self.insert_print_statement(sub_node.lineno))
+ new_body.append(sub_node)
+
+ return new_body
+
+ def visit(self, node):
+ """Overridden visit to transform nodes."""
+ new_node = super().visit(node)
+
+ # If node has a body, process it
+ if hasattr(new_node, 'body'):
+ new_node.body = self.process_body(new_node.body)
+
+ # If node has an orelse block (like in for, while, if), process it
+ if hasattr(new_node, 'orelse') and new_node.orelse:
+ new_node.orelse = self.process_body(new_node.orelse)
+
+ # Special case for Try nodes as they have multiple blocks
+ if isinstance(new_node, ast.Try):
+ for handler in new_node.handlers:
+ handler.body = self.process_body(handler.body)
+ if new_node.finalbody:
+ new_node.finalbody = self.process_body(new_node.finalbody)
+
+ return new_node
+
+
+def wrap_in_try_except(code):
+ # Add import traceback
+ code = 'import traceback\n' + code
+
+ # Parse the input code into an AST
+ parsed_code = ast.parse(code)
+
+ # Wrap the entire code's AST in a single try-except block
+ try_except = ast.Try(
+ body=parsed_code.body,
+ handlers=[
+ ast.ExceptHandler(
+ type=ast.Name(id='Exception', ctx=ast.Load()),
+ name=None,
+ body=[
+ ast.Expr(
+ value=ast.Call(
+ func=ast.Attribute(
+ value=ast.Name(id='traceback', ctx=ast.Load()),
+ attr='print_exc',
+ ctx=ast.Load(),
+ ),
+ args=[],
+ keywords=[],
+ )),
+ ],
+ )
+ ],
+ orelse=[],
+ finalbody=[],
+ )
+
+ # Assign the try-except block as the new body
+ parsed_code.body = [try_except]
+
+ # Convert the modified AST back to source code
+ return ast.unparse(parsed_code)
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/r.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/r.py
new file mode 100644
index 0000000000000000000000000000000000000000..28936608c7fce3b1ff7952bd09abd0b62f95539e
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/r.py
@@ -0,0 +1,71 @@
+import re
+
+from ..subprocess_code_interpreter import SubprocessCodeInterpreter
+
+
+class R(SubprocessCodeInterpreter):
+ file_extension = 'r'
+ proper_name = 'R'
+
+ def __init__(self):
+ super().__init__()
+ self.start_cmd = 'R -q --vanilla' # Start R in quiet and vanilla mode
+
+ def preprocess_code(self, code):
+ """
+ Add active line markers
+ Wrap in a tryCatch for better error handling in R
+ Add end of execution marker
+ """
+
+ lines = code.split('\n')
+ processed_lines = []
+
+ for i, line in enumerate(lines, 1):
+ # Add active line print
+ processed_lines.append(f'cat("##active_line{i}##\\n");{line}')
+
+ # Join lines to form the processed code
+ processed_code = '\n'.join(processed_lines)
+
+ # Wrap in a tryCatch for error handling and add end of execution marker
+ processed_code = f"""
+tryCatch({{
+{processed_code}
+}}, error=function(e){{
+ cat("## execution_error ##\\n", conditionMessage(e), "\\n");
+}})
+cat("## end_of_execution ##\\n");
+"""
+ # Count the number of lines of processed_code
+ # (R echoes all code back for some reason, but we can skip it if we track this!)
+ self.code_line_count = len(processed_code.split('\n')) - 1
+
+ return processed_code
+
+ def line_postprocessor(self, line):
+ # If the line count attribute is set and non-zero, decrement and skip the line
+ if hasattr(self, 'code_line_count') and self.code_line_count > 0:
+ self.code_line_count -= 1
+ return None
+
+ if re.match(r'^(\s*>>>\s*|\s*\.\.\.\s*|\s*>\s*|\s*\+\s*|\s*)$', line):
+ return None
+ if 'R version' in line: # Startup message
+ return None
+ if line.strip().startswith('[1] "') and line.endswith(
+ '"'): # For strings, trim quotation marks
+ return line[5:-1].strip()
+ if line.strip().startswith(
+ '[1]'): # Normal R output prefix for non-string outputs
+ return line[4:].strip()
+
+ return line
+
+ def detect_active_line(self, line):
+ if '##active_line' in line:
+ return int(line.split('##active_line')[1].split('##')[0])
+ return None
+
+ def detect_end_of_execution(self, line):
+ return '##end_of_execution##' in line or '## execution_error ##' in line
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/shell.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/shell.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbc067071f317c4a22176f140c910f139f09e4dc
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/shell.py
@@ -0,0 +1,89 @@
+import os
+import platform
+import re
+
+from ..subprocess_code_interpreter import SubprocessCodeInterpreter
+
+
+class Shell(SubprocessCodeInterpreter):
+ file_extension = 'sh'
+ proper_name = 'Shell'
+
+ def __init__(self):
+ super().__init__()
+
+ # Determine the start command based on the platform
+ if platform.system() == 'Windows':
+ self.start_cmd = 'cmd.exe'
+ else:
+ self.start_cmd = os.environ.get('SHELL', 'bash')
+
+ def preprocess_code(self, code):
+ return preprocess_shell(code)
+
+ def line_postprocessor(self, line):
+ return line
+
+ def detect_active_line(self, line):
+ if '##active_line' in line:
+ return int(line.split('##active_line')[1].split('##')[0])
+ return None
+
+ def detect_end_of_execution(self, line):
+ return '##end_of_execution##' in line
+
+
+def preprocess_shell(code):
+ """
+ Add active line markers
+ Wrap in a try except (trap in shell)
+ Add end of execution marker
+ """
+
+ # Add commands that tell us what the active line is
+ # if it's multiline, just skip this. soon we should make it work with multiline
+ if not has_multiline_commands(code):
+ code = add_active_line_prints(code)
+
+ # Add end command (we'll be listening for this so we know when it ends)
+ code += '\necho "##end_of_execution##"'
+
+ return code
+
+
+def add_active_line_prints(code):
+ """
+ Add echo statements indicating line numbers to a shell string.
+ """
+ lines = code.split('\n')
+ for index, line in enumerate(lines):
+ # Insert the echo command before the actual line
+ lines[index] = f'echo "##active_line{index + 1}##"\n{line}'
+ return '\n'.join(lines)
+
+
+def has_multiline_commands(script_text):
+ # Patterns that indicate a line continues
+ continuation_patterns = [
+ r'\\$', # Line continuation character at the end of the line
+ r'\|$', # Pipe character at the end of the line indicating a pipeline continuation
+ r'&&\s*$', # Logical AND at the end of the line
+ r'\|\|\s*$', # Logical OR at the end of the line
+ r'<\($', # Start of process substitution
+ r'\($', # Start of subshell
+ r'{\s*$', # Start of a block
+ r'\bif\b', # Start of an if statement
+ r'\bwhile\b', # Start of a while loop
+ r'\bfor\b', # Start of a for loop
+ r'do\s*$', # 'do' keyword for loops
+ r'then\s*$', # 'then' keyword for if statements
+ ]
+
+ # Check each line for multiline patterns
+ for line in script_text.splitlines():
+ if any(
+ re.search(pattern, line.rstrip())
+ for pattern in continuation_patterns):
+ return True
+
+ return False
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/subprocess_code_interpreter.py b/my_modelscope_agent/tools/code_interpreter_utils/subprocess_code_interpreter.py
new file mode 100644
index 0000000000000000000000000000000000000000..01e6a7e0dddf8cce88e4aa15dd74a29849940e08
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/subprocess_code_interpreter.py
@@ -0,0 +1,152 @@
+import queue
+import subprocess
+import threading
+import time
+import traceback
+
+from .base_code_interpreter import BaseCodeInterpreter
+
+
+class SubprocessCodeInterpreter(BaseCodeInterpreter):
+
+ def __init__(self):
+ self.start_cmd = ''
+ self.process = None
+ self.debug_mode = False
+ self.output_queue = queue.Queue()
+ self.done = threading.Event()
+
+ def detect_active_line(self, line):
+ return None
+
+ def detect_end_of_execution(self, line):
+ return None
+
+ def line_postprocessor(self, line):
+ return line
+
+ def preprocess_code(self, code):
+ """
+ This needs to insert an end_of_execution marker of some kind,
+ which can be detected by detect_end_of_execution.
+
+ Optionally, add active line markers for detect_active_line.
+ """
+ return code
+
+ def terminate(self):
+ self.process.terminate()
+
+ def start_process(self):
+ if self.process:
+ self.terminate()
+
+ self.process = subprocess.Popen(
+ self.start_cmd.split(),
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ text=True,
+ bufsize=0,
+ universal_newlines=True,
+ )
+ threading.Thread(
+ target=self.handle_stream_output,
+ args=(self.process.stdout, False),
+ daemon=True,
+ ).start()
+ threading.Thread(
+ target=self.handle_stream_output,
+ args=(self.process.stderr, True),
+ daemon=True,
+ ).start()
+
+ def run(self, code):
+ retry_count = 0
+ max_retries = 3
+
+ # Setup
+ try:
+ code = self.preprocess_code(code)
+ if not self.process:
+ self.start_process()
+ except Exception as e:
+ print(e)
+ yield {'output': traceback.format_exc()}
+ return
+
+ while retry_count <= max_retries:
+ if self.debug_mode:
+ print(
+ f'(after processing) Running processed code:\n{code}\n---')
+
+ self.done.clear()
+
+ try:
+ self.process.stdin.write(code + '\n')
+ self.process.stdin.flush()
+ break
+ except Exception as e:
+ print(e)
+ if retry_count != 0:
+ # For UX, I like to hide this if it happens once. Obviously feels better to not see errors
+ # Most of the time it doesn't matter, but we should figure out why it happens frequently with:
+ # applescript
+ yield {'output': traceback.format_exc()}
+ yield {
+ 'output': f'Retrying... ({retry_count}/{max_retries})'
+ }
+ yield {'output': 'Restarting process.'}
+
+ self.start_process()
+
+ retry_count += 1
+ if retry_count > max_retries:
+ yield {
+ 'output':
+ 'Maximum retries reached. Could not execute code.'
+ }
+ return
+
+ while True:
+ if not self.output_queue.empty():
+ yield self.output_queue.get()
+ else:
+ time.sleep(0.1)
+ try:
+ output = self.output_queue.get(
+ timeout=0.3) # Waits for 0.3 seconds
+ yield output
+ except queue.Empty:
+ if self.done.is_set():
+ # Try to yank 3 more times from it... maybe there's something in there...
+ # (I don't know if this actually helps. Maybe we just need to yank 1 more time)
+ for _ in range(3):
+ if not self.output_queue.empty():
+ yield self.output_queue.get()
+ time.sleep(0.2)
+ break
+
+ def handle_stream_output(self, stream, is_error_stream):
+ for line in iter(stream.readline, ''):
+ if self.debug_mode:
+ print(f'Received output line:\n{line}\n---')
+
+ line = self.line_postprocessor(line)
+
+ if line is None:
+ continue # `line = None` is the postprocessor's signal to discard completely
+
+ if self.detect_active_line(line):
+ active_line = self.detect_active_line(line)
+ self.output_queue.put({'active_line': active_line})
+ elif self.detect_end_of_execution(line):
+ self.output_queue.put({'active_line': None})
+ time.sleep(0.1)
+ self.done.set()
+ elif is_error_stream and 'KeyboardInterrupt' in line:
+ self.output_queue.put({'output': 'KeyboardInterrupt'})
+ time.sleep(0.1)
+ self.done.set()
+ else:
+ self.output_queue.put({'output': line})
diff --git a/my_modelscope_agent/tools/code_interpreter_utils/truncate_output.py b/my_modelscope_agent/tools/code_interpreter_utils/truncate_output.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3ed3314ff51d0a7af6c19abb3e4bfabc2ede420
--- /dev/null
+++ b/my_modelscope_agent/tools/code_interpreter_utils/truncate_output.py
@@ -0,0 +1,15 @@
+def truncate_output(data, max_output_chars=2000):
+ needs_truncation = False
+
+ message = f'Output truncated. Showing the last {max_output_chars} characters.\n\n'
+
+ # Remove previous truncation message if it exists
+ if data.startswith(message):
+ data = data[len(message):]
+ needs_truncation = True
+
+ # If data exceeds max length, truncate it and add message
+ if len(data) > max_output_chars or needs_truncation:
+ data = message + data[-max_output_chars:]
+
+ return data
diff --git a/my_modelscope_agent/tools/hf_tool.py b/my_modelscope_agent/tools/hf_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..98fa94e6fd76d96139e1292fe35b136acaa4f9ab
--- /dev/null
+++ b/my_modelscope_agent/tools/hf_tool.py
@@ -0,0 +1,22 @@
+from typing import Dict, List
+
+from transformers.tools import Tool as HFTool
+
+from .tool import Tool
+
+
+class HFTool(Tool):
+ """Simple wrapper for huggingface transformers tools
+
+ """
+
+ def __init__(self, tool: HFTool, description: str, name: str,
+ parameters: List[Dict]):
+ self.tool = tool
+ self.description = description
+ self.name = name
+ self.parameters = parameters
+ super().__init__()
+
+ def _local_call(self, *args, **kwargs):
+ return {'result': self.tool(**kwargs)}
diff --git a/my_modelscope_agent/tools/image_chat_tool.py b/my_modelscope_agent/tools/image_chat_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..526df966b8ac47bb3e26224c8be8d941101f3f9f
--- /dev/null
+++ b/my_modelscope_agent/tools/image_chat_tool.py
@@ -0,0 +1,51 @@
+from modelscope.utils.constant import Tasks
+from .pipeline_tool import ModelscopePipelineTool
+
+
+class ImageChatTool(ModelscopePipelineTool):
+ default_model = 'damo/multi-modal_mplug_owl_multimodal-dialogue_7b'
+ description = '图文对话和图像描述服务,针对输入的图片和用户的文本输入,给出文本回复'
+ name = 'modelscope_image-chat'
+ parameters: list = [{
+ 'name': 'image',
+ 'description': '用户输入的图片',
+ 'required': True
+ }, {
+ 'name': 'text',
+ 'description': '用户输入的文本',
+ 'required': True
+ }]
+ task = Tasks.multimodal_dialogue
+
+ def construct_image_chat_input(self, **kwargs):
+ image = kwargs.pop('image', '')
+ text = kwargs.pop('text', '')
+
+ system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
+ system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
+ messages = {
+ 'messages': [
+ {
+ 'role': 'system',
+ 'content': system_prompt_1 + ' ' + system_prompt_2
+ },
+ {
+ 'role': 'user',
+ 'content': [{
+ 'image': image
+ }]
+ },
+ {
+ 'role': 'user',
+ 'content': text
+ },
+ ]
+ }
+ return messages
+
+ def _remote_parse_input(self, *args, **kwargs):
+ messages = self.construct_image_chat_input(**kwargs)
+ return {'input': messages}
+
+ def _local_parse_input(self, *args, **kwargs):
+ return (self.construct_image_chat_input(**kwargs)), {}
diff --git a/my_modelscope_agent/tools/openapi_plugin.py b/my_modelscope_agent/tools/openapi_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..2502305ab532ae20157e9b98f830ab03fd46d925
--- /dev/null
+++ b/my_modelscope_agent/tools/openapi_plugin.py
@@ -0,0 +1,370 @@
+import os
+import re
+from typing import List, Optional
+
+import json
+import requests
+from jsonschema import RefResolver
+from pydantic import BaseModel, ValidationError
+from requests.exceptions import RequestException, Timeout
+
+from .tool import Tool
+
+MAX_RETRY_TIMES = 3
+
+
+class ParametersSchema(BaseModel):
+ name: str
+ description: str
+ required: Optional[bool] = True
+
+
+class ToolSchema(BaseModel):
+ name: str
+ description: str
+ parameters: List[ParametersSchema]
+
+
+class OpenAPIPluginTool(Tool):
+ """
+ openapi schema tool
+ """
+ name: str = 'api tool'
+ description: str = 'This is a api tool that ...'
+ parameters: list = []
+
+ def __init__(self, cfg, name):
+ self.name = name
+ self.cfg = cfg.get(self.name, {})
+ self.is_remote_tool = self.cfg.get('is_remote_tool', False)
+ # remote call
+ self.url = self.cfg.get('url', '')
+ self.token = self.cfg.get('token', '')
+ self.header = self.cfg.get('header', '')
+ self.method = self.cfg.get('method', '')
+ self.parameters = self.cfg.get('parameters', [])
+ self.description = self.cfg.get('description',
+ 'This is a api tool that ...')
+ self.responses_param = self.cfg.get('responses_param', [])
+ try:
+ all_para = {
+ 'name': self.name,
+ 'description': self.description,
+ 'parameters': self.parameters
+ }
+ self.tool_schema = ToolSchema(**all_para)
+ except ValidationError:
+ raise ValueError(f'Error when parsing parameters of {self.name}')
+ self._str = self.tool_schema.model_dump_json()
+ self._function = self.parse_pydantic_model_to_openai_function(all_para)
+
+ def _remote_call(self, *args, **kwargs):
+ if self.url == '':
+ raise ValueError(
+ f"Could not use remote call for {self.name} since this tool doesn't have a remote endpoint"
+ )
+
+ remote_parsed_input = json.dumps(
+ self._remote_parse_input(*args, **kwargs))
+ origin_result = None
+ if self.method == 'POST':
+ retry_times = MAX_RETRY_TIMES
+ while retry_times:
+ retry_times -= 1
+ try:
+ print(f'data: {kwargs}')
+ print(f'header: {self.header}')
+ response = requests.request(
+ 'POST',
+ url=self.url,
+ headers=self.header,
+ data=remote_parsed_input)
+
+ if response.status_code != requests.codes.ok:
+ response.raise_for_status()
+ origin_result = json.loads(
+ response.content.decode('utf-8'))
+
+ final_result = self._parse_output(
+ origin_result, remote=True)
+ return final_result
+ except Timeout:
+ continue
+ except RequestException as e:
+ raise ValueError(
+ f'Remote call failed with error code: {e.response.status_code},\
+ error message: {e.response.content.decode("utf-8")}')
+
+ raise ValueError(
+ 'Remote call max retry times exceeded! Please try to use local call.'
+ )
+ elif self.method == 'GET':
+ retry_times = MAX_RETRY_TIMES
+
+ new_url = self.url
+ matches = re.findall(r'\{(.*?)\}', self.url)
+ for match in matches:
+ if match in kwargs:
+ new_url = new_url.replace('{' + match + '}', kwargs[match])
+ else:
+ print(
+ f'The parameter {match} was not generated by the model.'
+ )
+
+ while retry_times:
+ retry_times -= 1
+ try:
+ print('GET:', new_url)
+ print('GET:', self.url)
+
+ response = requests.request(
+ 'GET',
+ url=new_url,
+ headers=self.header,
+ params=remote_parsed_input)
+ if response.status_code != requests.codes.ok:
+ response.raise_for_status()
+
+ origin_result = json.loads(
+ response.content.decode('utf-8'))
+
+ final_result = self._parse_output(
+ origin_result, remote=True)
+ return final_result
+ except Timeout:
+ continue
+ except RequestException as e:
+ raise ValueError(
+ f'Remote call failed with error code: {e.response.status_code},\
+ error message: {e.response.content.decode("utf-8")}')
+
+ raise ValueError(
+ 'Remote call max retry times exceeded! Please try to use local call.'
+ )
+ else:
+ raise ValueError(
+ 'Remote call method is invalid!We have POST and GET method.')
+
+ def _remote_parse_input(self, *args, **kwargs):
+ restored_dict = {}
+ for key, value in kwargs.items():
+ if '.' in key:
+ # Split keys by "." and create nested dictionary structures
+ keys = key.split('.')
+ temp_dict = restored_dict
+ for k in keys[:-1]:
+ temp_dict = temp_dict.setdefault(k, {})
+ temp_dict[keys[-1]] = value
+ else:
+ # f the key does not contain ".", directly store the key-value pair into restored_dict
+ restored_dict[key] = value
+ kwargs = restored_dict
+ print('传给tool的参数:', kwargs)
+ return kwargs
+
+
+# openapi_schema_convert,register to tool_config.json
+def extract_references(schema_content):
+ references = []
+ if isinstance(schema_content, dict):
+ if '$ref' in schema_content:
+ references.append(schema_content['$ref'])
+ for key, value in schema_content.items():
+ references.extend(extract_references(value))
+ elif isinstance(schema_content, list):
+ for item in schema_content:
+ references.extend(extract_references(item))
+ return references
+
+
+def parse_nested_parameters(param_name, param_info, parameters_list, content):
+ param_type = param_info['type']
+ param_description = param_info.get('description',
+ f'用户输入的{param_name}') # 按需更改描述
+ param_required = param_name in content['required']
+ try:
+ if param_type == 'object':
+ properties = param_info.get('properties')
+ if properties:
+ # If the argument type is an object and has a non-empty "properties" field,
+ # its internal properties are parsed recursively
+ for inner_param_name, inner_param_info in properties.items():
+ inner_param_type = inner_param_info['type']
+ inner_param_description = inner_param_info.get(
+ 'description', f'用户输入的{param_name}.{inner_param_name}')
+ inner_param_required = param_name.split(
+ '.')[0] in content['required']
+
+ # Recursively call the function to handle nested objects
+ if inner_param_type == 'object':
+ parse_nested_parameters(
+ f'{param_name}.{inner_param_name}',
+ inner_param_info, parameters_list, content)
+ else:
+ parameters_list.append({
+ 'name':
+ f'{param_name}.{inner_param_name}',
+ 'description':
+ inner_param_description,
+ 'required':
+ inner_param_required,
+ 'type':
+ inner_param_type,
+ 'value':
+ inner_param_info.get('enum', '')
+ })
+ else:
+ # Non-nested parameters are added directly to the parameter list
+ parameters_list.append({
+ 'name': param_name,
+ 'description': param_description,
+ 'required': param_required,
+ 'type': param_type,
+ 'value': param_info.get('enum', '')
+ })
+ except Exception as e:
+ raise ValueError(f'{e}:schema结构出错')
+
+
+def parse_responses_parameters(param_name, param_info, parameters_list):
+ param_type = param_info['type']
+ param_description = param_info.get('description',
+ f'调用api返回的{param_name}') # 按需更改描述
+ try:
+ if param_type == 'object':
+ properties = param_info.get('properties')
+ if properties:
+ # If the argument type is an object and has a non-empty "properties"
+ # field, its internal properties are parsed recursively
+
+ for inner_param_name, inner_param_info in properties.items():
+ param_type = inner_param_info['type']
+ param_description = inner_param_info.get(
+ 'description',
+ f'调用api返回的{param_name}.{inner_param_name}')
+ parameters_list.append({
+ 'name': f'{param_name}.{inner_param_name}',
+ 'description': param_description,
+ 'type': param_type,
+ })
+ else:
+ # Non-nested parameters are added directly to the parameter list
+ parameters_list.append({
+ 'name': param_name,
+ 'description': param_description,
+ 'type': param_type,
+ })
+ except Exception as e:
+ raise ValueError(f'{e}:schema结构出错')
+
+
+def openapi_schema_convert(schema, auth):
+
+ resolver = RefResolver.from_schema(schema)
+ servers = schema.get('servers', [])
+ if servers:
+ servers_url = servers[0].get('url')
+ else:
+ print('No URL found in the schema.')
+ # Extract endpoints
+ endpoints = schema.get('paths', {})
+ description = schema.get('info', {}).get('description',
+ 'This is a api tool that ...')
+ config_data = {}
+ # Iterate over each endpoint and its contents
+ for endpoint_path, methods in endpoints.items():
+ for method, details in methods.items():
+ summary = details.get('summary', 'No summary').replace(' ', '_')
+ name = details.get('operationId', 'No operationId')
+ url = f'{servers_url}{endpoint_path}'
+ security = details.get('security', [{}])
+ # Security (Bearer Token)
+ authorization = ''
+ if security:
+ for sec in security:
+ if 'BearerAuth' in sec:
+ api_token = auth.get('apikey', os.environ['apikey'])
+ api_token_type = auth.get('apikey_type',
+ os.environ['apikey_type'])
+ authorization = f'{api_token_type} {api_token}'
+ if method.upper() == 'POST':
+ requestBody = details.get('requestBody', {})
+ if requestBody:
+ for content_type, content_details in requestBody.get(
+ 'content', {}).items():
+ schema_content = content_details.get('schema', {})
+ references = extract_references(schema_content)
+ for reference in references:
+ resolved_schema = resolver.resolve(reference)
+ content = resolved_schema[1]
+ parameters_list = []
+ for param_name, param_info in content[
+ 'properties'].items():
+ parse_nested_parameters(
+ param_name, param_info, parameters_list,
+ content)
+ X_DashScope_Async = requestBody.get(
+ 'X-DashScope-Async', '')
+ if X_DashScope_Async == '':
+ config_entry = {
+ 'name': name,
+ 'description': description,
+ 'is_active': True,
+ 'is_remote_tool': True,
+ 'url': url,
+ 'method': method.upper(),
+ 'parameters': parameters_list,
+ 'header': {
+ 'Content-Type': content_type,
+ 'Authorization': authorization
+ }
+ }
+ else:
+ config_entry = {
+ 'name': name,
+ 'description': description,
+ 'is_active': True,
+ 'is_remote_tool': True,
+ 'url': url,
+ 'method': method.upper(),
+ 'parameters': parameters_list,
+ 'header': {
+ 'Content-Type': content_type,
+ 'Authorization': authorization,
+ 'X-DashScope-Async': 'enable'
+ }
+ }
+ else:
+ config_entry = {
+ 'name': name,
+ 'description': description,
+ 'is_active': True,
+ 'is_remote_tool': True,
+ 'url': url,
+ 'method': method.upper(),
+ 'parameters': [],
+ 'header': {
+ 'Content-Type': 'application/json',
+ 'Authorization': authorization
+ }
+ }
+ elif method.upper() == 'GET':
+ parameters_list = []
+ parameters_list = details.get('parameters', [])
+ config_entry = {
+ 'name': name,
+ 'description': description,
+ 'is_active': True,
+ 'is_remote_tool': True,
+ 'url': url,
+ 'method': method.upper(),
+ 'parameters': parameters_list,
+ 'header': {
+ 'Authorization': authorization
+ }
+ }
+ else:
+ raise 'method is not POST or GET'
+
+ config_data[summary] = config_entry
+ return config_data
diff --git a/my_modelscope_agent/tools/pipeline_tool.py b/my_modelscope_agent/tools/pipeline_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..12f676dbd56b18e4ddf1ac7130f7a3bef6751a91
--- /dev/null
+++ b/my_modelscope_agent/tools/pipeline_tool.py
@@ -0,0 +1,40 @@
+from modelscope.pipelines import pipeline
+from .tool import Tool
+
+
+class ModelscopePipelineTool(Tool):
+
+ default_model: str = ''
+ task: str = ''
+ model_revision = None
+
+ def __init__(self, cfg):
+
+ super().__init__(cfg)
+ self.model = self.cfg.get('model', None) or self.default_model
+ self.model_revision = self.cfg.get('model_revision',
+ None) or self.model_revision
+
+ self.pipeline_params = self.cfg.get('pipeline_params', {})
+ self.pipeline = None
+ self.is_initialized = False
+
+ def setup(self):
+
+ # only initialize when this tool is really called to save memory
+ if not self.is_initialized:
+ self.pipeline = pipeline(
+ task=self.task,
+ model=self.model,
+ model_revision=self.model_revision,
+ **self.pipeline_params)
+ self.is_initialized = True
+
+ def _local_call(self, *args, **kwargs):
+
+ self.setup()
+
+ parsed_args, parsed_kwargs = self._local_parse_input(*args, **kwargs)
+ origin_result = self.pipeline(*parsed_args, **parsed_kwargs)
+ final_result = self._parse_output(origin_result, remote=False)
+ return final_result
diff --git a/my_modelscope_agent/tools/plugin_tool.py b/my_modelscope_agent/tools/plugin_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd1242448545b6042aa8f12d2fcd5ea959306427
--- /dev/null
+++ b/my_modelscope_agent/tools/plugin_tool.py
@@ -0,0 +1,30 @@
+from copy import deepcopy
+
+from .tool import Tool
+
+
+class LangchainTool(Tool):
+
+ def __init__(self, langchain_tool):
+ from langchain.tools import BaseTool
+
+ if not isinstance(langchain_tool, BaseTool):
+ raise ValueError('langchain_tool should be type of langchain tool')
+ self.langchain_tool = langchain_tool
+ self.parse_langchain_schema()
+ super().__init__()
+
+ def parse_langchain_schema(self):
+ # convert langchain tool schema to modelscope_agent tool schema
+ self.description = self.langchain_tool.description
+ self.name = self.langchain_tool.name
+ self.parameters = []
+ for name, arg in self.langchain_tool.args.items():
+ tool_arg = deepcopy(arg)
+ tool_arg['name'] = name
+ tool_arg['required'] = True
+ tool_arg.pop('title')
+ self.parameters.append(tool_arg)
+
+ def _local_call(self, *args, **kwargs):
+ return {'result': self.langchain_tool.run(kwargs)}
diff --git a/my_modelscope_agent/tools/text_address_tool.py b/my_modelscope_agent/tools/text_address_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c52b147b84fbe6d8da0e69989e936b8b731ea04
--- /dev/null
+++ b/my_modelscope_agent/tools/text_address_tool.py
@@ -0,0 +1,20 @@
+from modelscope.utils.constant import Tasks
+from .pipeline_tool import ModelscopePipelineTool
+
+
+class TextAddressTool(ModelscopePipelineTool):
+ default_model = 'damo/mgeo_geographic_elements_tagging_chinese_base'
+ description = '地址解析服务,针对中文地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等'
+ name = 'modelscope_text-address'
+ parameters: list = [{
+ 'name': 'input',
+ 'description': '用户输入的地址信息',
+ 'required': True
+ }]
+ task = Tasks.token_classification
+
+ def _parse_output(self, origin_result, *args, **kwargs):
+ final_result = {}
+ for e in origin_result['output']:
+ final_result[e['type']] = e['span']
+ return {'result': final_result}
diff --git a/my_modelscope_agent/tools/text_ie_tool.py b/my_modelscope_agent/tools/text_ie_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8c983e8481cace0acaa228eddf22bd6df28af01
--- /dev/null
+++ b/my_modelscope_agent/tools/text_ie_tool.py
@@ -0,0 +1,32 @@
+from collections import defaultdict
+
+from modelscope.utils.constant import Tasks
+from .pipeline_tool import ModelscopePipelineTool
+
+
+class TextInfoExtractTool(ModelscopePipelineTool):
+ default_model = 'damo/nlp_structbert_siamese-uie_chinese-base'
+ description = '信息抽取服务,针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示'
+ name = 'modelscope_text-ie'
+ parameters: list = [{
+ 'name': 'input',
+ 'description': '用户输入的文本',
+ 'required': True
+ }, {
+ 'name': 'schema',
+ 'description': '要抽取信息的json表示',
+ 'required': True
+ }]
+ task = Tasks.siamese_uie
+
+ def _remote_parse_input(self, *args, **kwargs):
+ kwargs['parameters'] = {'schema': kwargs['schema']}
+ kwargs.pop('schema')
+ return kwargs
+
+ def _parse_output(self, origin_result, *args, **kwargs):
+ final_result = defaultdict(list)
+ for e in origin_result['output']:
+ final_result[e[0]['type']].append(e[0]['span'])
+
+ return {'result': dict(final_result)}
diff --git a/my_modelscope_agent/tools/text_ner_tool.py b/my_modelscope_agent/tools/text_ner_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..a694a96c90a2b6d9e344754b065e81a8b5897ee9
--- /dev/null
+++ b/my_modelscope_agent/tools/text_ner_tool.py
@@ -0,0 +1,22 @@
+from collections import defaultdict
+
+from modelscope.utils.constant import Tasks
+from .pipeline_tool import ModelscopePipelineTool
+
+
+class TextNerTool(ModelscopePipelineTool):
+ default_model = 'damo/nlp_raner_named-entity-recognition_chinese-base-news'
+ description = '命名实体识别服务,针对需要识别的中文文本,找出其中的实体,返回json格式结果'
+ name = 'modelscope_text-ner'
+ parameters: list = [{
+ 'name': 'input',
+ 'description': '用户输入的文本',
+ 'required': True
+ }]
+ task = Tasks.named_entity_recognition
+
+ def _parse_output(self, origin_result, *args, **kwargs):
+ final_result = defaultdict(list)
+ for e in origin_result['output']:
+ final_result[e['type']].append(e['span'])
+ return {'result': dict(final_result)}
diff --git a/my_modelscope_agent/tools/text_to_image_tool.py b/my_modelscope_agent/tools/text_to_image_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..088749729723679498b2d97f9f3443f6f8f70e60
--- /dev/null
+++ b/my_modelscope_agent/tools/text_to_image_tool.py
@@ -0,0 +1,114 @@
+import os
+import re
+
+import cv2
+import dashscope
+import json
+from dashscope import ImageSynthesis
+from ..output_wrapper import ImageWrapper
+
+from modelscope.utils.constant import Tasks
+from .pipeline_tool import ModelscopePipelineTool
+
+
+class TextToImageTool(ModelscopePipelineTool):
+ default_model = 'AI-ModelScope/stable-diffusion-xl-base-1.0'
+ description = 'AI绘画(图像生成)服务,输入文本描述和图像分辨率,返回根据文本信息绘制的图片URL。'
+ name = 'image_gen'
+ parameters: list = [{
+ 'name': 'text',
+ 'description': '详细描述了希望生成的图像具有什么内容,例如人物、环境、动作等细节描述',
+ 'required': True,
+ 'schema': {
+ 'type': 'string'
+ }
+ }, {
+ 'name': 'resolution',
+ 'description':
+ '格式是 数字*数字,表示希望生成的图像的分辨率大小,选项有[1024*1024, 720*1280, 1280*720]',
+ 'required': True,
+ 'schema': {
+ 'type': 'string'
+ }
+ }]
+ model_revision = 'v1.0.0'
+ task = Tasks.text_to_image_synthesis
+
+ # def _remote_parse_input(self, *args, **kwargs):
+ # params = {
+ # 'input': {
+ # 'text': kwargs['text'],
+ # 'resolution': kwargs['resolution']
+ # }
+ # }
+ # if kwargs.get('seed', None):
+ # params['input']['seed'] = kwargs['seed']
+ # return params
+
+ def _remote_call(self, *args, **kwargs):
+
+ if ('resolution' in kwargs) and (kwargs['resolution'] in [
+ '1024*1024', '720*1280', '1280*720'
+ ]):
+ resolution = kwargs['resolution']
+ else:
+ resolution = '1280*720'
+
+ prompt = kwargs['text']
+ seed = kwargs.get('seed', None)
+ if prompt is None:
+ return None
+ dashscope.api_key = os.getenv('DASHSCOPE_API_KEY')
+ response = ImageSynthesis.call(
+ model=ImageSynthesis.Models.wanx_v1,
+ prompt=prompt,
+ n=1,
+ size=resolution,
+ steps=10,
+ seed=seed)
+ final_result = self._parse_output(response, remote=True)
+ return final_result
+
+ def _local_parse_input(self, *args, **kwargs):
+
+ text = kwargs.pop('text', '')
+
+ parsed_args = ({'text': text}, )
+
+ return parsed_args, {}
+
+ def _parse_output(self, origin_result, remote=True):
+ if not remote:
+ image = cv2.cvtColor(origin_result['output_imgs'][0],
+ cv2.COLOR_BGR2RGB)
+ else:
+ image = origin_result.output['results'][0]['url']
+
+ return {'result': ImageWrapper(image)}
+
+ def _handle_input_fallback(self, **kwargs):
+ """
+ an alternative method is to parse image is that get item between { and }
+ for last try
+
+ :param fallback_text:
+ :return: language, cocde
+ """
+
+ text = kwargs.get('text', None)
+ fallback = kwargs.get('fallback', None)
+
+ if text:
+ return text
+ elif fallback:
+ try:
+ text = fallback
+ json_block = re.search(r'\{([\s\S]+)\}', text) # noqa W^05
+ if json_block:
+ result = json_block.group(1)
+ result_json = json.loads('{' + result + '}')
+ return result_json['text']
+ except ValueError:
+ return text
+ else:
+ return text
diff --git a/my_modelscope_agent/tools/text_to_speech_tool.py b/my_modelscope_agent/tools/text_to_speech_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..47c7beb05698807b28affe3d5d1523b737f5d097
--- /dev/null
+++ b/my_modelscope_agent/tools/text_to_speech_tool.py
@@ -0,0 +1,44 @@
+from ..output_wrapper import AudioWrapper
+
+from modelscope.utils.constant import Tasks
+from .pipeline_tool import ModelscopePipelineTool
+
+
+class TexttoSpeechTool(ModelscopePipelineTool):
+ default_model = 'damo/speech_sambert-hifigan_tts_zh-cn_16k'
+ description = '文本转语音服务,将文字转换为自然而逼真的语音,可配置男声/女声'
+ name = 'modelscope_speech-generation'
+ parameters: list = [{
+ 'name': 'input',
+ 'description': '要转成语音的文本',
+ 'required': True
+ }, {
+ 'name': 'gender',
+ 'description': '用户身份',
+ 'required': True
+ }]
+ task = Tasks.text_to_speech
+
+ def _local_parse_input(self, *args, **kwargs):
+ if 'gender' not in kwargs:
+ kwargs['gender'] = 'man'
+ voice = 'zhizhe_emo' if kwargs['gender'] == 'man' else 'zhiyan_emo'
+ kwargs['voice'] = voice
+ if 'text' in kwargs and 'input' not in kwargs:
+ kwargs['input'] = kwargs['text']
+ kwargs.pop('text')
+ kwargs.pop('gender')
+ return args, kwargs
+
+ def _remote_parse_input(self, *args, **kwargs):
+ if 'gender' not in kwargs:
+ kwargs['gender'] = 'man'
+ voice = 'zhizhe_emo' if kwargs['gender'] == 'man' else 'zhiyan_emo'
+ kwargs['voice'] = voice
+ kwargs.pop('gender')
+ return kwargs
+
+ def _parse_output(self, origin_result, remote=True):
+
+ audio = origin_result['output_wav']
+ return {'result': AudioWrapper(audio)}
diff --git a/my_modelscope_agent/tools/text_to_video_tool.py b/my_modelscope_agent/tools/text_to_video_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bcacb9ad57e113c98824465792ccef535930a66
--- /dev/null
+++ b/my_modelscope_agent/tools/text_to_video_tool.py
@@ -0,0 +1,40 @@
+import os
+import tempfile
+import uuid
+
+from ..output_wrapper import VideoWrapper
+
+from modelscope.utils.constant import Tasks
+from .pipeline_tool import ModelscopePipelineTool
+
+
+class TextToVideoTool(ModelscopePipelineTool):
+ default_model = 'damo/text-to-video-synthesis'
+ description = '视频生成服务,针对英文文本输入,生成一段描述视频;如果是中文输入同时依赖插件modelscope_text-translation-zh2en翻译成英文'
+
+ name = 'modelscope_video-generation'
+ parameters: list = [{
+ 'name': 'text',
+ 'description': '用户输入的文本信息',
+ 'required': True
+ }]
+ task = Tasks.text_to_video_synthesis
+
+ def _remote_parse_input(self, *args, **kwargs):
+ return {'input': {'text': kwargs['text']}}
+
+ def _local_parse_input(self, *args, **kwargs):
+
+ text = kwargs.pop('text', '')
+ directory = tempfile.mkdtemp()
+ file_path = os.path.join(directory, str(uuid.uuid4()) + '.mp4')
+
+ parsed_args = ({'text': text}, )
+ parsed_kwargs = {'output_video': file_path}
+
+ return parsed_args, parsed_kwargs
+
+ def _parse_output(self, origin_result, remote=True):
+
+ video = origin_result['output_video']
+ return {'result': VideoWrapper(video)}
diff --git a/my_modelscope_agent/tools/tool.py b/my_modelscope_agent/tools/tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..5252f1c9c12658d118059e0179b21d4b29319941
--- /dev/null
+++ b/my_modelscope_agent/tools/tool.py
@@ -0,0 +1,180 @@
+import os
+from typing import List, Optional
+
+import json
+import requests
+from pydantic import BaseModel, ValidationError
+from requests.exceptions import RequestException, Timeout
+
+MODELSCOPE_API_TOKEN = os.getenv('MODELSCOPE_API_TOKEN')
+
+MAX_RETRY_TIMES = 3
+
+
+class ParametersSchema(BaseModel):
+ name: str
+ description: str
+ required: Optional[bool] = True
+
+
+class ToolSchema(BaseModel):
+ name: str
+ description: str
+ parameters: List[ParametersSchema]
+
+
+class Tool:
+ """
+ a base class for tools.
+ when you inherit this class and implement new tool, you should provide name, description
+ and parameters of tool that conforms with schema.
+
+ each tool may have two call method: _local_call(execute tool in your local environment)
+ and _remote_call(construct a http request to remote server).
+ corresponding to preprocess and postprocess method may need to be overrided to get correct result.
+ """
+ name: str = 'tool'
+ description: str = 'This is a tool that ...'
+ parameters: list = []
+
+ def __init__(self, cfg={}):
+ self.cfg = cfg.get(self.name, {})
+ self.is_remote_tool = self.cfg.get('is_remote_tool', False)
+
+ # remote call
+ self.url = self.cfg.get('url', '')
+ self.token = self.cfg.get('token', '')
+ self.header = {
+ 'Authorization': self.token or f'Bearer {MODELSCOPE_API_TOKEN}'
+ }
+
+ try:
+ all_para = {
+ 'name': self.name,
+ 'description': self.description,
+ 'parameters': self.parameters
+ }
+ self.tool_schema = ToolSchema(**all_para)
+ except ValidationError:
+ raise ValueError(f'Error when parsing parameters of {self.name}')
+
+ self._str = self.tool_schema.model_dump_json()
+ self._function = self.parse_pydantic_model_to_openai_function(all_para)
+
+ def __call__(self, remote=False, *args, **kwargs):
+ if self.is_remote_tool or remote:
+ return self._remote_call(*args, **kwargs)
+ else:
+ return self._local_call(*args, **kwargs)
+
+ def _remote_call(self, *args, **kwargs):
+ if self.url == '':
+ raise ValueError(
+ f"Could not use remote call for {self.name} since this tool doesn't have a remote endpoint"
+ )
+
+ remote_parsed_input = json.dumps(
+ self._remote_parse_input(*args, **kwargs))
+
+ origin_result = None
+ retry_times = MAX_RETRY_TIMES
+ while retry_times:
+ retry_times -= 1
+ try:
+ response = requests.request(
+ 'POST',
+ self.url,
+ headers=self.header,
+ data=remote_parsed_input)
+ if response.status_code != requests.codes.ok:
+ response.raise_for_status()
+
+ origin_result = json.loads(
+ response.content.decode('utf-8'))['Data']
+
+ final_result = self._parse_output(origin_result, remote=True)
+ return final_result
+ except Timeout:
+ continue
+ except RequestException as e:
+ raise ValueError(
+ f'Remote call failed with error code: {e.response.status_code},\
+ error message: {e.response.content.decode("utf-8")}')
+
+ raise ValueError(
+ 'Remote call max retry times exceeded! Please try to use local call.'
+ )
+
+ def _local_call(self, *args, **kwargs):
+ return
+
+ def _remote_parse_input(self, *args, **kwargs):
+ return kwargs
+
+ def _local_parse_input(self, *args, **kwargs):
+ return args, kwargs
+
+ def _parse_output(self, origin_result, *args, **kwargs):
+ return {'result': origin_result}
+
+ def __str__(self):
+ return self._str
+
+ def get_function(self):
+ return self._function
+
+ def parse_pydantic_model_to_openai_function(self, all_para: dict):
+ '''
+ this method used to convert a pydantic model to openai function schema
+ such that convert
+ all_para = {
+ 'name': get_current_weather,
+ 'description': Get the current weather in a given location,
+ 'parameters': [{
+ 'name': 'image',
+ 'description': '用户输入的图片',
+ 'required': True
+ }, {
+ 'name': 'text',
+ 'description': '用户输入的文本',
+ 'required': True
+ }]
+ }
+ to
+ {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "image": {
+ "type": "string",
+ "description": "用户输入的图片",
+ },
+ "text": {
+ "type": "string",
+ "description": "用户输入的文本",
+ },
+ "required": ["image", "text"],
+ },
+ }
+ '''
+
+ function = {
+ 'name': all_para['name'],
+ 'description': all_para['description'],
+ 'parameters': {
+ 'type': 'object',
+ 'properties': {},
+ 'required': [],
+ },
+ }
+ for para in all_para['parameters']:
+ function['parameters']['properties'][para['name']] = {
+ 'type': 'string',
+ 'description': para['description']
+ }
+ if para['required']:
+ function['parameters']['required'].append(para['name'])
+
+ return function
diff --git a/my_modelscope_agent/tools/translation_en2zh_tool.py b/my_modelscope_agent/tools/translation_en2zh_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e6803e320b3093113e63a288a896938f292136a
--- /dev/null
+++ b/my_modelscope_agent/tools/translation_en2zh_tool.py
@@ -0,0 +1,17 @@
+from modelscope.utils.constant import Tasks
+from .pipeline_tool import ModelscopePipelineTool
+
+
+class TranslationEn2ZhTool(ModelscopePipelineTool):
+ default_model = 'damo/nlp_csanmt_translation_en2zh'
+ description = '根据输入指令,将相应的英文文本翻译成中文回复'
+ name = 'modelscope_text-translation-en2zh'
+ task = Tasks.translation
+ parameters: list = [{
+ 'name': 'input',
+ 'description': '用户输入的英文文本',
+ 'required': True
+ }]
+
+ def _parse_output(self, origin_result, *args, **kwargs):
+ return {'result': origin_result['translation']}
diff --git a/my_modelscope_agent/tools/translation_zh2en_tool.py b/my_modelscope_agent/tools/translation_zh2en_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..6371acb1e22f2b607025840bc2b7dd8dd96facd1
--- /dev/null
+++ b/my_modelscope_agent/tools/translation_zh2en_tool.py
@@ -0,0 +1,17 @@
+from modelscope.utils.constant import Tasks
+from .pipeline_tool import ModelscopePipelineTool
+
+
+class TranslationZh2EnTool(ModelscopePipelineTool):
+ default_model = 'damo/nlp_csanmt_translation_zh2en'
+ description = '根据输入指令,将相应的中文文本翻译成英文回复'
+ name = 'modelscope_text-translation-zh2en'
+ task = Tasks.translation
+ parameters: list = [{
+ 'name': 'input',
+ 'description': '用户输入的中文文本',
+ 'required': True
+ }]
+
+ def _parse_output(self, origin_result, *args, **kwargs):
+ return {'result': origin_result['translation']}
diff --git a/my_modelscope_agent/tools/web_browser.py b/my_modelscope_agent/tools/web_browser.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d0b7e2c2771e0823d2089465edd83cd24b9cdc3
--- /dev/null
+++ b/my_modelscope_agent/tools/web_browser.py
@@ -0,0 +1,72 @@
+import httpx
+from langchain.document_loaders import AsyncHtmlLoader
+from langchain.document_transformers import BeautifulSoupTransformer
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from ..tools.tool import Tool
+
+
+class WebBrowser(Tool):
+ description = '生成艺术字纹理图片'
+ name = 'web_browser'
+ parameters: list = [{
+ 'name': 'urls',
+ 'description': 'the urls that the user wants to browse',
+ 'required': True
+ }]
+
+ def __init__(self, cfg={}):
+ super().__init__(cfg)
+ self.split_url_into_chunk = self.cfg.get('split_url_into_chunk', False)
+ self.headers = {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)'
+ }
+ self.client = httpx.Client(
+ headers=self.headers, verify=False, timeout=30.0)
+
+ def _local_call(self, *args, **kwargs):
+ parsed_args, parsed_kwargs = self._local_parse_input(*args, **kwargs)
+
+ urls = parsed_kwargs['urls']
+ print(urls)
+ if urls is None:
+ return {'result': ''}
+
+ # # load html
+ loader = AsyncHtmlLoader(urls)
+ docs = loader.load()
+ # Transform
+ bs_transformer = BeautifulSoupTransformer()
+ docs_transformed = bs_transformer.transform_documents(
+ docs, tags_to_extract=['span'])
+
+ # split url content into chunk in order to get fine-grained results
+ if self.split_url_into_chunk:
+ # Grab the first 1000 tokens of the site
+ splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+ chunk_size=1000, chunk_overlap=0)
+ splits = splitter.split_documents(docs_transformed)
+ else:
+ splits = docs_transformed
+ search_results = []
+ for item in splits:
+ result = {
+ 'url': item.metadata['source'],
+ 'content': item.page_content
+ }
+ search_results.append(result)
+
+ return {'result': search_results}
+
+ def _local_parse_input(self, *args, **kwargs):
+ urls = kwargs.get('urls', [])
+ if isinstance(urls, str):
+ urls = [urls]
+ kwargs['urls'] = urls
+ return args, kwargs
+
+
+if __name__ == '__main__':
+ tool = WebBrowser()
+ urls = ['https://blog.sina.com.cn/zhangwuchang']
+ result = tool._local_call(urls=urls)
+ print(result)
diff --git a/my_modelscope_agent/tools/web_search.py b/my_modelscope_agent/tools/web_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..e40f68b31a10d26a015329f4357152947ac3ad1d
--- /dev/null
+++ b/my_modelscope_agent/tools/web_search.py
@@ -0,0 +1,85 @@
+import os
+
+from ..tools.tool import Tool, ToolSchema
+from ..tools.web_search_utils import get_websearcher_cls
+from ..tools.web_search_utils.search_util import \
+ AuthenticationKey
+from pydantic import ValidationError
+
+
+class WebSearch(Tool):
+ description = 'surfacing relevant information from billions of web documents. Help ' \
+ 'you find what you are looking for from the world-wide-web to comb ' \
+ 'billions of webpages, images, videos, and news.'
+ name = 'web_search_utils'
+ parameters: list = [{
+ 'name': 'query',
+ 'description':
+ """The user's search query term. The term may not be empty.""",
+ 'required': True
+ }]
+
+ def __init__(self, cfg={}):
+ super().__init__()
+ available_searchers = get_websearcher_cls()
+ all_searchers = AuthenticationKey.to_dict()
+ if not len(available_searchers):
+ raise ValueError(
+ f'At least one of web search api token should be set: {all_searchers}'
+ )
+
+ searcher = cfg.pop('searcher', None)
+
+ if not searcher:
+ self.searcher = available_searchers[0](**cfg)
+ else:
+ if isinstance(searcher,
+ str) and len(searcher) and all_searchers.get(
+ searcher, None):
+ cls = available_searchers.get(searcher, None)
+ if not cls:
+ raise ValueError(
+ f'The searcher {searcher}\'s token is not set: {all_searchers.get(searcher, None)}'
+ )
+ self.searcher = cls(**cfg)
+ else:
+ raise ValueError(
+ f'The searcher {searcher} should be one of {all_searchers.keys()}'
+ )
+
+ try:
+ all_para = {
+ 'name': self.name,
+ 'description': self.description,
+ 'parameters': self.parameters
+ }
+ self.tool_schema = ToolSchema(**all_para)
+ except ValidationError:
+ raise ValueError(f'Error when parsing parameters of {self.name}')
+
+ self.is_remote_tool = True
+ self._str = self.tool_schema.model_dump_json()
+ self._function = self.parse_pydantic_model_to_openai_function(all_para)
+
+ def _remote_call(self, *args, **kwargs):
+ query = self._handle_input_fallback(**kwargs)
+ if not query or not len(query):
+ raise ValueError(
+ 'parameter `query` of tool web-search is None or Empty.')
+
+ res = self.searcher(query)
+ return {'result': [item.__dict__ for item in res]}
+
+ def _handle_input_fallback(self, **kwargs):
+ query = kwargs.get('query', None)
+ fallback = kwargs.get('fallback', None)
+ if query and isinstance(query, str) and len(query):
+ return query
+ else:
+ return fallback
+
+
+if __name__ == '__main__':
+ tool = WebSearch()
+ res = tool(query='2024年 元旦 哈尔滨天气')
+ print(res)
diff --git a/my_modelscope_agent/tools/web_search_utils/__init__.py b/my_modelscope_agent/tools/web_search_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..906c6277e8b50754a7be8e9e1c05ee0049e14cac
--- /dev/null
+++ b/my_modelscope_agent/tools/web_search_utils/__init__.py
@@ -0,0 +1,2 @@
+from ..web_search_utils.search_util import \
+ get_websearcher_cls
diff --git a/my_modelscope_agent/tools/web_search_utils/search_util.py b/my_modelscope_agent/tools/web_search_utils/search_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..843e48eda16fbb0fe39b3163f18af47b746e5f2a
--- /dev/null
+++ b/my_modelscope_agent/tools/web_search_utils/search_util.py
@@ -0,0 +1,40 @@
+import os
+
+
+class SearchResult:
+
+ def __init__(self, title=None, link=None, sniper=None):
+ assert link or sniper
+ self.title = title
+ self.link = link
+ self.sniper = sniper
+
+
+class AuthenticationKey:
+ bing = 'BING_SEARCH_V7_SUBSCRIPTION_KEY'
+ kuake = 'PLACE_HOLDER'
+
+ @classmethod
+ def to_dict(cls):
+ raw_dict = cls.__dict__
+ res = dict(
+ filter(lambda x: '__' not in x[0] and isinstance(x[1], str),
+ raw_dict.items()))
+ return res
+
+
+def get_websearcher_cls():
+
+ def get_env(authentication_key: str):
+ env = os.environ
+ return env.get(authentication_key, None)
+
+ cls_list = []
+ if get_env(AuthenticationKey.bing):
+ from ..web_search_utils.searcher.bing import BingWebSearcher
+ cls_list.append(BingWebSearcher)
+ if get_env(AuthenticationKey.kuanke):
+ from ..web_search_utils.searcher.kuake import KuakeWebSearcher
+ cls_list.append(KuakeWebSearcher)
+
+ return cls_list
diff --git a/my_modelscope_agent/tools/web_search_utils/searcher/__init__.py b/my_modelscope_agent/tools/web_search_utils/searcher/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/my_modelscope_agent/tools/web_search_utils/searcher/base_searcher.py b/my_modelscope_agent/tools/web_search_utils/searcher/base_searcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f72cf1ebdb18cf19b520014d442750e908b0a22
--- /dev/null
+++ b/my_modelscope_agent/tools/web_search_utils/searcher/base_searcher.py
@@ -0,0 +1,5 @@
+class WebSearcher:
+ timeout = 1000
+
+ def __call__(self, **kwargs):
+ raise NotImplementedError()
diff --git a/my_modelscope_agent/tools/web_search_utils/searcher/bing.py b/my_modelscope_agent/tools/web_search_utils/searcher/bing.py
new file mode 100644
index 0000000000000000000000000000000000000000..393f843063928be2a8d8474585283d986b43581c
--- /dev/null
+++ b/my_modelscope_agent/tools/web_search_utils/searcher/bing.py
@@ -0,0 +1,59 @@
+import os
+
+import json
+import requests
+from ..search_util import (
+ AuthenticationKey, SearchResult)
+
+from .base_searcher import WebSearcher
+
+
+class BingWebSearcher(WebSearcher):
+
+ def __init__(
+ self,
+ timeout=3000,
+ mkt='en-US',
+ endpoint='https://api.bing.microsoft.com/v7.0/search',
+ ):
+ self.mkt = mkt
+ self.endpoint = endpoint
+ self.timeout = timeout
+ self.token = os.environ.get(AuthenticationKey.bing)
+
+ def __call__(self, query, **kwargs):
+ params = {'q': query, 'mkt': self.mkt}
+ headers = {'Ocp-Apim-Subscription-Key': self.token}
+ if kwargs:
+ params.update(kwargs)
+ try:
+ response = requests.get(
+ self.endpoint,
+ headers=headers,
+ params=params,
+ timeout=self.timeout)
+ raw_result = json.loads(response.text)
+ if raw_result.get('error', None):
+ print(f'Call Bing web search api failed: {raw_result}')
+ except Exception as ex:
+ raise ex('Call Bing web search api failed.')
+
+ results = []
+ res_list = raw_result.get('webPages', {}).get('value', [])
+ for item in res_list:
+ title = item.get('name', None)
+ link = item.get('url', None)
+ sniper = item.get('snippet', None)
+ if not link and not sniper:
+ continue
+
+ results.append(SearchResult(title=title, link=link, sniper=sniper))
+
+ return results
+
+
+if __name__ == '__main__':
+
+ searcher = BingWebSearcher()
+ res = searcher('哈尔滨元旦的天气情况')
+ print([item.__dict__ for item in res])
diff --git a/my_modelscope_agent/tools/web_search_utils/searcher/kuake.py b/my_modelscope_agent/tools/web_search_utils/searcher/kuake.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c135dfa5ed2081c40012dfa4195b44950874d6e
--- /dev/null
+++ b/my_modelscope_agent/tools/web_search_utils/searcher/kuake.py
@@ -0,0 +1,7 @@
+from .base_searcher import WebSearcher
+
+
+class KuakeWebSearcher(WebSearcher):
+
+ def __call__(self, query, **kwargs):
+ raise NotImplementedError()
diff --git a/my_modelscope_agent/tools/wordart_tool.py b/my_modelscope_agent/tools/wordart_tool.py
new file mode 100644
index 0000000000000000000000000000000000000000..65a28d9d8b3c69df172a3c41f0c98b0ba4eb927a
--- /dev/null
+++ b/my_modelscope_agent/tools/wordart_tool.py
@@ -0,0 +1,169 @@
+import os
+import time
+
+import json
+import pandas as pd
+import requests
+from ..tools.tool import Tool, ToolSchema
+from pydantic import ValidationError
+from requests.exceptions import RequestException, Timeout
+
+MAX_RETRY_TIMES = 3
+
+
+class WordArtTexture(Tool):
+ description = '生成艺术字纹理图片'
+ name = 'wordart_texture_generation'
+ parameters: list = [{
+ 'name': 'input.text.text_content',
+ 'description': 'text that the user wants to convert to WordArt',
+ 'required': True
+ }, {
+ 'name': 'input.prompt',
+ 'description':
+ 'Users’ style requirements for word art may be requirements in terms of shape, color, entity, etc.',
+ 'required': True
+ }, {
+ 'name': 'input.texture_style',
+ 'description':
+ 'Type of texture style;Default is "material";If not provided by the user, \
+ defaults to "material".Another value is scene.',
+ 'required': True
+ }, {
+ 'name': 'input.text.output_image_ratio',
+ 'description':
+ 'The aspect ratio of the text input image; the default is "1:1", \
+ the available ratios are: "1:1", "16:9", "9:16";',
+ 'required': True
+ }]
+
+ def __init__(self, cfg={}):
+ self.cfg = cfg.get(self.name, {})
+ # remote call
+ self.url = 'https://dashscope.aliyuncs.com/api/v1/services/aigc/wordart/texture'
+ self.token = self.cfg.get('token',
+ os.environ.get('DASHSCOPE_API_KEY', ''))
+ assert self.token != '', 'dashscope api token must be acquired with wordart'
+
+ try:
+ all_param = {
+ 'name': self.name,
+ 'description': self.description,
+ 'parameters': self.parameters
+ }
+ self.tool_schema = ToolSchema(**all_param)
+ except ValidationError:
+ raise ValueError(f'Error when parsing parameters of {self.name}')
+
+ self._str = self.tool_schema.model_dump_json()
+ self._function = self.parse_pydantic_model_to_openai_function(
+ all_param)
+
+ def __call__(self, *args, **kwargs):
+ remote_parsed_input = json.dumps(
+ self._remote_parse_input(*args, **kwargs))
+ origin_result = None
+ retry_times = MAX_RETRY_TIMES
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Authorization': f'Bearer {self.token}',
+ 'X-DashScope-Async': 'enable'
+ }
+ while retry_times:
+ retry_times -= 1
+ try:
+
+ response = requests.request(
+ 'POST',
+ url=self.url,
+ headers=headers,
+ data=remote_parsed_input)
+
+ if response.status_code != requests.codes.ok:
+ response.raise_for_status()
+ origin_result = json.loads(response.content.decode('utf-8'))
+
+ self.final_result = self._parse_output(
+ origin_result, remote=True)
+ return self.get_wordart_result()
+ except Timeout:
+ continue
+ except RequestException as e:
+ raise ValueError(
+ f'Remote call failed with error code: {e.response.status_code},\
+ error message: {e.response.content.decode("utf-8")}')
+
+ raise ValueError(
+ 'Remote call max retry times exceeded! Please try to use local call.'
+ )
+
+ def _remote_parse_input(self, *args, **kwargs):
+ restored_dict = {}
+ for key, value in kwargs.items():
+ if '.' in key:
+ # Split keys by "." and create nested dictionary structures
+ keys = key.split('.')
+ temp_dict = restored_dict
+ for k in keys[:-1]:
+ temp_dict = temp_dict.setdefault(k, {})
+ temp_dict[keys[-1]] = value
+ else:
+ # f the key does not contain ".", directly store the key-value pair into restored_dict
+ restored_dict[key] = value
+ kwargs = restored_dict
+ kwargs['model'] = 'wordart-texture'
+ print('传给tool的参数:', kwargs)
+ return kwargs
+
+ def get_result(self):
+ result_data = json.loads(json.dumps(self.final_result['result']))
+ if 'task_id' in result_data['output']:
+ task_id = result_data['output']['task_id']
+ get_url = f'https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}'
+ get_header = {'Authorization': f'Bearer {self.token}'}
+ origin_result = None
+ retry_times = MAX_RETRY_TIMES
+ while retry_times:
+ retry_times -= 1
+ try:
+ response = requests.request(
+ 'GET', url=get_url, headers=get_header)
+ if response.status_code != requests.codes.ok:
+ response.raise_for_status()
+ origin_result = json.loads(response.content.decode('utf-8'))
+
+ get_result = self._parse_output(origin_result, remote=True)
+ return get_result
+ except Timeout:
+ continue
+ except RequestException as e:
+ raise ValueError(
+ f'Remote call failed with error code: {e.response.status_code},\
+ error message: {e.response.content.decode("utf-8")}')
+
+ raise ValueError(
+ 'Remote call max retry times exceeded! Please try to use local call.'
+ )
+
+ def get_wordart_result(self):
+ try:
+ result = self.get_result()
+ print(result)
+ while True:
+ result_data = result.get('result', {})
+ output = result_data.get('output', {})
+ task_status = output.get('task_status', '')
+
+ if task_status == 'SUCCEEDED':
+ print('任务已完成')
+ return result
+
+ elif task_status == 'FAILED':
+ raise ('任务失败')
+ else:
+ # 继续轮询,等待一段时间后再次调用
+ time.sleep(1) # 等待 1 秒钟
+ result = self.get_result()
+
+ except Exception as e:
+ print('get Remote Error:', str(e))
diff --git a/my_modelscope_agent/version.py b/my_modelscope_agent/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..683418800a8b84a130ce976b5a71f37242642884
--- /dev/null
+++ b/my_modelscope_agent/version.py
@@ -0,0 +1 @@
+__version__ = '0.2.1-rc0'
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce44072ba77b3f5336901320c0834e67565208e1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,21 @@
+gradio
+dashscope
+datasets>=2.8.0
+ipython
+jupyter>=1.0.0
+langchain<=0.0.292
+modelscope>=1.7.0
+moviepy
+ms-swift
+openai
+opencv-python
+openpyxl
+Pillow
+pydantic>=2.0.0
+pypdf
+pytest
+python-dotenv
+seaborn
+soundfile
+transformers>=4.29.0
+transformers_stream_generator
\ No newline at end of file