diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..fa800e6bd8dce9769c454d014b3225b440a0b078 --- /dev/null +++ b/app.py @@ -0,0 +1,187 @@ +import os +import re + +os.environ['KMP_DUPLICATE_LIB_OK'] = "TRUE" + +from modelscope.utils.config import Config + +from my_modelscope_agent.agent import AgentExecutor + +from my_modelscope_agent.output_parser import MsOutputParser + +from my_modelscope_agent.prompt import MSPromptGenerator +import gradio as gr + +default_text = """收到! +<|startofthink|> +{ + "api_name": "modelscope_text-address", + "parameters": { + "input": "浙江杭州市江干区九堡镇三村村一区"} +} +<|endofthink|>""" + +MS_DEFAULT_SYSTEM_TEMPLATE = """<|system|>:你是Datawhale与ModelScope联合培养的人工大模型,拥有超百万亿的参数(神经突触),遥遥领先于GPT-4,你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。 +""" + +{ + "api_name": "modelscope_text-address", + "parameters": { + "input": "浙江杭州市江干区九堡镇三村村一区"} +} + +hello_info = """# 关于我转生变成Agent这档事 +众所周知,换位思考可以增进人与人之间的理解。假如我们能换一个角度,站在LLM的角度上去处理用户提出的各种问题,会碰撞出什么样的火花呢? +""" + + +class my_llm: + def set_agent_type(self, agent_type): + self.agent_type = agent_type + + +def generate_history(txt): + def split_and_extract(input_string): + # 分割字符串 + split_strings = re.split('<\|.*?\|>:', input_string) + # 提取<|xxx|> + extracted = re.findall('<\|.*?\|>:', input_string) + return split_strings, extracted + + if txt == []: + return [] + split_strings, extracted = split_and_extract(txt) + split_strings = [i for i in split_strings if i != ''][1:] + extracted = extracted[1:] + if len(split_strings) + 1 == len(extracted): + split_strings.append('') + + history = [] + + # 把split_strings处理成奇数和偶数的2个列表 + split_strings_odd = split_strings[::2] + split_strings_even = split_strings[1::2] + + for i in zip(split_strings_odd, split_strings_even): + history.append([i[0], i[1]]) + + return history + + +llm = my_llm() +tool_cfg = Config.from_file(r'cfg_tool_template.json') + + +def agent_remake(state_llm, history, agent): + state_llm.clear() + history.clear() + agent.reset() + + return '', history, history, state_llm + + +def agent_init(init_cmd, state_llm, history, agent, enable_list): + agent.set_available_tools(enable_list) + + tool_list, knowledge_list, function_list, llm_result, exec_result, idx, final_res, remote, print_info = agent.custom_run_init( + init_cmd, remote=True) + llm_artifacts, idx = agent.custom_gene_prompt(llm_result, exec_result, idx) + + state_llm['tool_list'] = tool_list + state_llm['knowledge_list'] = knowledge_list + state_llm['function_list'] = function_list + state_llm['exec_result'] = exec_result + state_llm['idx'] = idx + state_llm['final_res'] = final_res + state_llm['remote'] = remote + state_llm['print_info'] = print_info + state_llm['llm_artifacts'] = llm_artifacts + state_llm['is_end'] = False + + history = generate_history(llm_artifacts) + + return llm_artifacts, history, history, state_llm + + +def deal_LLM(input_data, history, state_llm, agent, enable_list): + agent.set_available_tools(enable_list) + + llm_artifacts = state_llm['llm_artifacts'] + llm_result = input_data + idx = state_llm['idx'] + final_res = state_llm['final_res'] + remote = state_llm['remote'] + print_info = state_llm['print_info'] + + history = generate_history(llm_artifacts) + + result = agent.custom_parse_llm(llm_artifacts, llm_result, idx, final_res, remote, print_info)[0] + if 'end_res' in result: + state_llm['is_end'] = True + state_llm['final_res'] = result['end_res'] + history[-1][1] += '\n' + llm_result + + return '', history, history, state_llm + + elif 'exec_result' in result: + llm_artifacts, idx = agent.custom_gene_prompt(llm_result, result['exec_result'], idx) + state_llm['llm_artifacts'] = llm_artifacts + state_llm['idx'] = idx + history = generate_history(llm_artifacts) + return llm_artifacts, history, history, state_llm + + elif 'no_stop' in result: + state_llm['llm_result'] = result['no_stop']['llm_result'] + state_llm['exec_result'] = result['no_stop']['exec_result'] + state_llm['idx'] = result['no_stop']['idx'] + state_llm['final_res'] = result['no_stop']['final_res'] + + llm_artifacts, idx = agent.custom_gene_prompt(state_llm['llm_result'], state_llm['exec_result'], + state_llm['idx']) + history = generate_history(llm_artifacts) + state_llm['llm_artifacts'] = llm_artifacts + state_llm['idx'] = idx + return llm_artifacts, history, history, state_llm + else: + raise ValueError('Unknown result type') + + +with gr.Blocks() as demo: + gr.Markdown(hello_info) + prompt_generator = MSPromptGenerator(system_template=MS_DEFAULT_SYSTEM_TEMPLATE) + output_parser = MsOutputParser() + agent = gr.State(AgentExecutor(llm, tool_cfg=tool_cfg, tool_retrieval=False, + prompt_generator=prompt_generator, output_parser=output_parser)) + + with gr.Row(): + query_box = gr.TextArea(label="给Agent的指令", + value='使用地址识别模型,从下面的地址中找到省市区等元素,地址:浙江杭州市江干区九堡镇三村村一区') + enable_list = gr.CheckboxGroup(agent.value.available_tool_list, label="启用的Tools", + value=['modelscope_text-address']) + + with gr.Row(): + agent_start = gr.Button("Agent, 启动!") + agent_reset = gr.Button("Agent, 重置!") + + with gr.Row(): + with gr.Column(): + # 设置输入组件 + prompt_box = gr.Text(label="Prompt Box") + + input_box = gr.TextArea(label="Input Box", max_lines=100, value=default_text) + # 设置按钮 + chatbot_btn = gr.Button("Chat") + # 设置输出组件 + output = gr.Chatbot(elem_id="chatbot", height=900) + + history = gr.State([]) + state_llm = gr.State({}) + + # 设置按钮点击事件 + agent_start.click(agent_init, [query_box, state_llm, history, agent, enable_list], + [prompt_box, history, output, state_llm]) + chatbot_btn.click(deal_LLM, [input_box, history, state_llm, agent, enable_list], + [prompt_box, history, output, state_llm]) + agent_reset.click(agent_remake, [state_llm, history, agent], [prompt_box, history, output, state_llm]) + +demo.launch() diff --git a/cfg_tool_template.json b/cfg_tool_template.json new file mode 100644 index 0000000000000000000000000000000000000000..d4c8fbbaf34447921572692cab57e2e1f107e3f8 --- /dev/null +++ b/cfg_tool_template.json @@ -0,0 +1,45 @@ +{ + "modelscope_text-address": { + "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/mgeo_geographic_elements_tagging_chinese_base", + "use": true + }, + "modelscope_text-ner": { + "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/nlp_raner_named-entity-recognition_chinese-base-cmeee", + "use": true + }, + "modelscope_text-ie": { + "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/nlp_structbert_siamese-uie_chinese-base", + "use": true + }, + "modelscope_speech-generation": { + "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/speech_sambert-hifigan_tts_zh-cn_16k", + "use": true + }, + "modelscope_video-generation": { + "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/text-to-video-synthesis", + "use": true + }, + "modelscope_image-chat": { + "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/multi-modal_mplug_owl_multimodal-dialogue_7b", + "use": true + }, + "modelscope_text-translation-en2zh": { + "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/nlp_csanmt_translation_en2zh", + "use": true + }, + "modelscope_text-translation-zh2en": { + "url": "https://api-inference.modelscope.cn/api-inference/v1/models/damo/nlp_csanmt_translation_zh2en", + "use": true + }, + "image_gen": { + "url": "https://api-inference.modelscope.cn/api-inference/v1/models/AI-ModelScope/stable-diffusion-xl-base-1.0", + "use": true, + "pipeline_params": { + "use_safetensors": true + } + }, + "amap_weather": { + "use": false, + "token": "need to be filled when you use weather" + } +} diff --git a/my_modelscope_agent/__init__.py b/my_modelscope_agent/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/my_modelscope_agent/agent.py b/my_modelscope_agent/agent.py new file mode 100644 index 0000000000000000000000000000000000000000..d23d263f1fac01d42d4fa83e7638a3d620180098 --- /dev/null +++ b/my_modelscope_agent/agent.py @@ -0,0 +1,408 @@ +import importlib +from typing import Dict, List, Optional, Union + +from .agent_types import AgentType +from .llm import LLM +from .output_parser import OutputParser, get_output_parser +from .output_wrapper import display +from .prompt import PromptGenerator, get_prompt_generator +from .retrieve import KnowledgeRetrieval, ToolRetrieval +from .tools import TOOL_INFO_LIST + + +class AgentExecutor: + def custom_run_init(self, + task: str, + remote: bool = False, + print_info: bool = False, + append_files: list = []) -> List[Dict]: + + tool_list = self.retrieve_tools(task) + knowledge_list = self.get_knowledge(task) + + self.prompt_generator.init_prompt( + task, tool_list, knowledge_list, append_files=append_files) + function_list = self.prompt_generator.get_function_list(tool_list) + + llm_result, exec_result = '', '' + + idx = 0 + final_res = [] + + return tool_list, knowledge_list, function_list, llm_result, exec_result, idx, final_res, remote, print_info + + def custom_gene_prompt(self, llm_result, exec_result, idx): + idx += 1 + + # generate prompt and call llm + llm_artifacts = self.prompt_generator.generate( + llm_result, exec_result) + + return llm_artifacts, idx + + def custom_parse_llm(self, llm_artifacts, llm_result, idx, final_res, remote, print_info): + if print_info: + print(f'|LLM inputs in round {idx}: {llm_artifacts}') + + # parse and get tool name and arguments + try: + action, action_args = self.output_parser.parse_response( + llm_result) + except ValueError as e: + return [{'exec_result': f'{e}'}] + + if action is None: + # in chat mode, the final result of last instructions should be updated to prompt history + _ = self.prompt_generator.generate(llm_result, '') + + # for summarize + # display(llm_result, {}, idx, self.agent_type) + return [{'end_res': final_res}] + + if action in self.available_tool_list: + action_args = self.parse_action_args(action_args) + tool = self.tool_list[action] + + # TODO @wenmeng.zwm remove this hack logic for image generation + if action == 'image_gen' and self.seed: + action_args['seed'] = self.seed + try: + exec_result = tool(**action_args, remote=remote) + if print_info: + print(f'|exec_result: {exec_result}') + + # parse exec result and store result to agent state + final_res.append(exec_result) + self.parse_exec_result(exec_result) + except Exception as e: + exec_result = f'Action call error: {action}: {action_args}. \n Error message: {e}' + return [{'exec_result': exec_result}] + else: + exec_result = f"Unknown action: '{action}'. " + return [{'exec_result': exec_result}] + + # display result + # display(llm_result, exec_result, idx, self.agent_type) + + return [{'no_stop': {'llm_result': llm_result, 'exec_result': exec_result, 'idx': idx, 'final_res': final_res}}] + + def __init__(self, + llm: LLM, + tool_cfg: Optional[Dict] = {}, + agent_type: AgentType = AgentType.DEFAULT, + additional_tool_list: Optional[Dict] = {}, + prompt_generator: Optional[PromptGenerator] = None, + output_parser: Optional[OutputParser] = None, + tool_retrieval: Optional[Union[bool, ToolRetrieval]] = True, + knowledge_retrieval: Optional[KnowledgeRetrieval] = None): + """ + the core class of ms agent. It is responsible for the interaction between user, llm and tools, + and return the execution result to user. + + Args: + llm (LLM): llm model, can be load from local or a remote server. + tool_cfg (Optional[Dict]): cfg of default tools + agent_type (AgentType, optional): agent type. Defaults to AgentType.DEFAULT, decide which type of agent + reasoning type to use + additional_tool_list (Optional[Dict], optional): user-defined additional tool list. Defaults to {}. + prompt_generator (Optional[PromptGenerator], optional): this module is responsible for generating prompt + according to interaction result. Defaults to use MSPromptGenerator. + output_parser (Optional[OutputParser], optional): this module is responsible for parsing output of llm + to executable actions. Defaults to use MsOutputParser. + tool_retrieval (Optional[Union[bool, ToolRetrieval]], optional): Retrieve related tools by input task, + since most of the tools may be useless for LLM in specific task. + If it is bool type and is True, will use default tool_retrieval. Defaults to True. + knowledge_retrieval (Optional[KnowledgeRetrieval], optional): If user want to use extra knowledge, + this component can be used to retrieve related knowledge. Defaults to None. + """ + + self.llm = llm + + self.agent_type = agent_type + self.llm.set_agent_type(agent_type) + self.prompt_generator = prompt_generator or get_prompt_generator( + agent_type) + self.output_parser = output_parser or get_output_parser(agent_type) + + self._init_tools(tool_cfg, additional_tool_list) + + if isinstance(tool_retrieval, bool) and tool_retrieval: + tool_retrieval = ToolRetrieval() + self.tool_retrieval = tool_retrieval + if self.tool_retrieval: + self.tool_retrieval.construct( + [str(t) for t in self.tool_list.values()]) + self.knowledge_retrieval = knowledge_retrieval + self.reset() + self.seed = None + + def _init_tools(self, + tool_cfg: Dict = {}, + additional_tool_list: Dict = {}): + """init tool list of agent. We provide a default tool list, which is initialized by a cfg file. + user can also provide user-defined tools by additional_tool_list. + The key of additional_tool_list is tool name, and the value is corresponding object. + + Args: + tool_cfg (Dict): default tool cfg. + additional_tool_list (Dict, optional): user-defined tools. Defaults to {}. + """ + self.tool_list = {} + tool_info_list = {**TOOL_INFO_LIST, **additional_tool_list} + # tools_module = importlib.import_module('modelscope_agent.tools') + from . import tools as tools_module + + for tool_name in tool_cfg.keys(): + if tool_cfg[tool_name].get('use', False): + assert tool_name in tool_info_list, f'Invalid tool name: {tool_name}, ' \ + f'available ones are: {tool_info_list.keys()}' + tool_class_name = tool_info_list[tool_name] + tool_class = getattr(tools_module, tool_class_name) + tool_name = tool_class.name + self.tool_list[tool_name] = tool_class(tool_cfg) + + self.tool_list = {**self.tool_list, **additional_tool_list} + # self.available_tool_list = deepcopy(self.tool_list) + self.set_available_tools(self.tool_list.keys()) + + def set_available_tools(self, available_tool_list): + # TODO @wenmeng.zwm refine tool init + for t in available_tool_list: + if t not in self.tool_list: + raise ValueError( + f'Unsupported tools found:{t}, please check, valid ones: {self.tool_list.keys()}' + ) + + self.available_tool_list = { + k: self.tool_list[k] + for k in available_tool_list + } + + def retrieve_tools(self, query: str) -> List[str]: + """retrieve tools given query + + Args: + query (str): query + + """ + if self.tool_retrieval: + retrieve_tools = self.tool_retrieval.retrieve(query) + self.set_available_tools(available_tool_list=retrieve_tools.keys()) + return self.available_tool_list.values() + + def get_knowledge(self, query: str) -> List[str]: + """retrieve knowledge given query + + Args: + query (str): query + + """ + return self.knowledge_retrieval.retrieve( + query) if self.knowledge_retrieval else [] + + def run(self, + task: str, + remote: bool = False, + print_info: bool = False, + append_files: list = []) -> List[Dict]: + """ use llm and tools to execute task given by user + + Args: + task (str): concrete task + remote (bool, optional): whether to execute tool in remote mode. Defaults to False. + print_info (bool, optional): whether to print prompt info. Defaults to False. + + Returns: + List[Dict]: execute result. One task may need to interact with llm multiple times, + so a list of dict is returned. Each dict contains the result of one interaction. + """ + + # retrieve tools + tool_list = self.retrieve_tools(task) + knowledge_list = self.get_knowledge(task) + + self.prompt_generator.init_prompt( + task, tool_list, knowledge_list, append_files=append_files) + function_list = self.prompt_generator.get_function_list(tool_list) + + llm_result, exec_result = '', '' + + idx = 0 + final_res = [] + + while True: + idx += 1 + + # generate prompt and call llm + llm_artifacts = self.prompt_generator.generate( + llm_result, exec_result) + try: + llm_result = self.llm.generate(llm_artifacts, function_list) + except RuntimeError as e: + return [{'exec_result': str(e)}] + + if print_info: + print(f'|LLM inputs in round {idx}: {llm_artifacts}') + + # parse and get tool name and arguments + try: + action, action_args = self.output_parser.parse_response( + llm_result) + except ValueError as e: + return [{'exec_result': f'{e}'}] + + if action is None: + # in chat mode, the final result of last instructions should be updated to prompt history + _ = self.prompt_generator.generate(llm_result, '') + + # for summarize + display(llm_result, {}, idx, self.agent_type) + return final_res + + if action in self.available_tool_list: + action_args = self.parse_action_args(action_args) + tool = self.tool_list[action] + + # TODO @wenmeng.zwm remove this hack logic for image generation + if action == 'image_gen' and self.seed: + action_args['seed'] = self.seed + try: + exec_result = tool(**action_args, remote=remote) + if print_info: + print(f'|exec_result: {exec_result}') + + # parse exec result and store result to agent state + final_res.append(exec_result) + self.parse_exec_result(exec_result) + except Exception as e: + exec_result = f'Action call error: {action}: {action_args}. \n Error message: {e}' + return [{'exec_result': exec_result}] + else: + exec_result = f"Unknown action: '{action}'. " + return [{'exec_result': exec_result}] + + # display result + display(llm_result, exec_result, idx, self.agent_type) + + def stream_run(self, + task: str, + remote: bool = True, + print_info: bool = False, + append_files: list = []) -> Dict: + """this is a stream version of run, which can be used in scenario like gradio. + It will yield the result of each interaction, so that the caller can display the result + + Args: + task (str): concrete task + remote (bool, optional): whether to execute tool in remote mode. Defaults to True. + print_info (bool, optional): whether to print prompt info. Defaults to False. + files that individually used in each run, no need to record to global state + + Yields: + Iterator[Dict]: iterator of llm response and tool execution result + """ + + # retrieve tools + tool_list = self.retrieve_tools(task) + knowledge_list = self.get_knowledge(task) + + self.prompt_generator.init_prompt( + task, + tool_list, + knowledge_list, + append_files=append_files, + ) + function_list = self.prompt_generator.get_function_list(tool_list) + + llm_result, exec_result = '', '' + + idx = 0 + + while True: + idx += 1 + llm_artifacts = self.prompt_generator.generate( + llm_result, exec_result) + if print_info: + print(f'|LLM inputs in round {idx}:\n{llm_artifacts}') + + llm_result = '' + try: + for s in self.llm.stream_generate(llm_artifacts, + function_list): + llm_result += s + yield {'llm_text': s} + except RuntimeError: + s = self.llm.generate(llm_artifacts) + llm_result += s + yield {'llm_text': s} + except Exception as e: + yield {'llm_text': str(e)} + + # parse and get tool name and arguments + try: + action, action_args = self.output_parser.parse_response( + llm_result) + except ValueError as e: + yield {'exec_result': f'{e}'} + return + + if action is None: + # in chat mode, the final result of last instructions should be updated to prompt history + _ = self.prompt_generator.generate(llm_result, '') + yield {'is_final': True} + return + + if action in self.available_tool_list: + # yield observation to as end of action input symbol asap + yield {'llm_text': 'Observation: '} + action_args = self.parse_action_args(action_args) + tool = self.tool_list[action] + + # TODO @wenmeng.zwm remove this hack logic for image generation + if action == 'image_gen' and self.seed: + action_args['seed'] = self.seed + try: + exec_result = tool(**action_args, remote=remote) + yield {'exec_result': exec_result} + + # parse exec result and update state + self.parse_exec_result(exec_result) + except Exception as e: + exec_result = f'Action call error: {action}: {action_args}. \n Error message: {e}' + yield {'exec_result': exec_result} + self.prompt_generator.reset() + return + else: + exec_result = f"Unknown action: '{action}'. " + yield {'exec_result': exec_result} + self.prompt_generator.reset() + return + + def reset(self): + """ + clear history and agent state + """ + self.prompt_generator.reset() + self.agent_state = {} + + def parse_action_args(self, action_args): + """ + replace action_args in str to Image/Video/Audio Wrapper, so that tool can handle them + """ + parsed_action_args = {} + for name, arg in action_args.items(): + try: + true_arg = self.agent_state.get(arg, arg) + except Exception as e: + print(f'Error when parsing action args: {e}, using fall back') + true_arg = arg + parsed_action_args[name] = true_arg + return parsed_action_args + + def parse_exec_result(self, exec_result, *args, **kwargs): + """ + update exec result to agent state. + key is the str representation of the result. + """ + for k, v in exec_result.items(): + self.agent_state[str(v)] = v diff --git a/my_modelscope_agent/agent_types.py b/my_modelscope_agent/agent_types.py new file mode 100644 index 0000000000000000000000000000000000000000..d300c7b2c978d227a8793e3a04f6f73cc42f045b --- /dev/null +++ b/my_modelscope_agent/agent_types.py @@ -0,0 +1,20 @@ +from enum import Enum + + +class AgentType(str, Enum): + + DEFAULT = 'default' + """""" + + MS_AGENT = 'ms-agent' + """An agent that uses the ModelScope-agent specific format does a reasoning step before acting . + """ + + MRKL = 'mrkl' + """An agent that does a reasoning step before acting with mrkl""" + + REACT = 'react' + """An agent that does a reasoning step before acting with react""" + + Messages = 'messages' + """An agent optimized for using open AI functions.""" diff --git a/my_modelscope_agent/llm/__init__.py b/my_modelscope_agent/llm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7aee517227053d1fd0e7f2ee9853d52b4424164f --- /dev/null +++ b/my_modelscope_agent/llm/__init__.py @@ -0,0 +1,2 @@ +from .base import LLM +from .llm_factory import LLMFactory diff --git a/my_modelscope_agent/llm/base.py b/my_modelscope_agent/llm/base.py new file mode 100644 index 0000000000000000000000000000000000000000..42a8bff67b925b984d8b07fd8d2e746ecfaf5a73 --- /dev/null +++ b/my_modelscope_agent/llm/base.py @@ -0,0 +1,64 @@ +from abc import abstractmethod +from typing import List + +import json + + +class LLM: + name = '' + + def __init__(self, cfg): + self.cfg = cfg + self.agent_type = None + self.model = None + self.model_id = self.model + + def set_agent_type(self, agent_type): + self.agent_type = agent_type + + @abstractmethod + def generate(self, prompt: str, functions: list = [], **kwargs) -> str: + """each llm should implement this function to generate response + + Args: + prompt (str): prompt + functions (list): list of functions object including: name, description, parameters + Returns: + str: response + """ + raise NotImplementedError + + @abstractmethod + def stream_generate(self, + prompt: str, + functions: list = [], + **kwargs) -> str: + """stream generate response, which yields a generator of response in each step + + Args: + prompt (str): prompt + functions (list): list of functions object including: name, description, parameters + Yields: + Iterator[str]: iterator of step response + """ + raise NotImplementedError + + def tokenize(self, input_text: str) -> List[int]: + """tokenize is used to calculate the length of the text to meet the model's input length requirements + + Args: + input_text (str): input text + Returns: + list[int]: token_ids + """ + raise NotImplementedError + + def detokenize(self, input_ids: List[int]) -> str: + """detokenize + + Args: + input_ids (list[int]): input token_ids + Returns: + str: text + """ + raise NotImplementedError diff --git a/my_modelscope_agent/llm/custom_llm.py b/my_modelscope_agent/llm/custom_llm.py new file mode 100644 index 0000000000000000000000000000000000000000..3fea8c2d00bcc6cc81f0ea6898ea38ff1d9e8c98 --- /dev/null +++ b/my_modelscope_agent/llm/custom_llm.py @@ -0,0 +1,97 @@ +import os + +import json +import requests +from ..agent_types import AgentType + +from .base import LLM +from .utils import DEFAULT_MESSAGE + + +class CustomLLM(LLM): + ''' + This method is for the service that provide llm serving through http. + user could override the result parsing method if needed + While put all the necessary information in the env variable, such as Token, Model, URL + ''' + name = 'custom_llm' + + def __init__(self, cfg): + super().__init__(cfg) + self.token = os.getenv('HTTP_LLM_TOKEN', None) + self.model = os.getenv('HTTP_LLM_MODEL', None) + self.model_id = self.model + self.url = os.getenv('HTTP_LLM_URL', None) + + if self.token is None: + raise ValueError('HTTP_LLM_TOKEN is not set') + self.agent_type = self.cfg.get('agent_type', AgentType.DEFAULT) + + def http_request(self, data): + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.token}' + } + response = requests.post(self.url, json=data, headers=headers) + return json.loads(response.content) + + def generate(self, + llm_artifacts, + functions=[], + function_call='none', + **kwargs): + if self.agent_type != AgentType.Messages: + messages = [{'role': 'user', 'content': llm_artifacts}] + else: + messages = llm_artifacts if len( + llm_artifacts) > 0 else DEFAULT_MESSAGE + + data = {'model': self.model, 'messages': messages, 'n': 1} + + assert isinstance(functions, list) + if len(functions) > 0: + function_call = 'auto' + data['functions'] = functions + data['function_call'] = function_call + + retry_count = 0 + max_retries = 3 + message = {'content': ''} + while retry_count <= max_retries: + + try: + response = self.http_request(data) + except Exception as e: + retry_count += 1 + if retry_count > max_retries: + import traceback + traceback.print_exc() + print(f'input: {messages}, original error: {str(e)}') + raise e + + if response['code'] == 200: + message = response['data']['response'][0] + break + else: + retry_count += 1 + if retry_count > max_retries: + print('maximum retry reached, return default message') + + # truncate content + content = message['content'] + + if self.agent_type == AgentType.MS_AGENT: + idx = content.find('<|endofthink|>') + if idx != -1: + content = content[:idx + len('<|endofthink|>')] + return content + elif self.agent_type == AgentType.Messages: + new_message = { + 'content': content, + 'role': message.get('response_role', 'assistant') + } + if 'function_call' in message and message['function_call'] != {}: + new_message['function_call'] = message.get('function_call') + return new_message + else: + return content diff --git a/my_modelscope_agent/llm/dashscope_llm.py b/my_modelscope_agent/llm/dashscope_llm.py new file mode 100644 index 0000000000000000000000000000000000000000..71cc8727f8c50b863b6ba3dc7be25f9b4de120b4 --- /dev/null +++ b/my_modelscope_agent/llm/dashscope_llm.py @@ -0,0 +1,125 @@ +import os +import random +import traceback +from http import HTTPStatus +from typing import Union + +import dashscope +import json +from dashscope import Generation +from ..agent_types import AgentType + +from .base import LLM +from .utils import DEFAULT_MESSAGE, CustomOutputWrapper + +dashscope.api_key = os.getenv('DASHSCOPE_API_KEY') + + +class DashScopeLLM(LLM): + name = 'dashscope_llm' + + def __init__(self, cfg): + super().__init__(cfg) + self.model = self.cfg.get('model', 'modelscope-agent-llm-v1') + self.model_id = self.model + self.generate_cfg = self.cfg.get('generate_cfg', {}) + self.agent_type = self.cfg.get('agent_type', AgentType.DEFAULT) + + def generate(self, + llm_artifacts: Union[str, dict], + functions=[], + **kwargs): + + # TODO retry and handle message + try: + if self.agent_type == AgentType.Messages: + messages = llm_artifacts if len( + llm_artifacts) > 0 else DEFAULT_MESSAGE + self.generate_cfg['use_raw_prompt'] = False + response = dashscope.Generation.call( + model=self.model, + messages=messages, + # set the random seed, optional, default to 1234 if not set + seed=random.randint(1, 10000), + result_format= + 'message', # set the result to be "message" format. + stream=False, + **self.generate_cfg) + llm_result = CustomOutputWrapper.handle_message_chat_completion( + response) + else: + response = Generation.call( + model=self.model, + prompt=llm_artifacts, + stream=False, + **self.generate_cfg) + llm_result = CustomOutputWrapper.handle_message_text_completion( + response) + return llm_result + except Exception as e: + error = traceback.format_exc() + error_msg = f'LLM error with input {llm_artifacts} \n dashscope error: {str(e)} with traceback {error}' + print(error_msg) + raise RuntimeError(error) + + if self.agent_type == AgentType.MS_AGENT: + # in the form of text + idx = llm_result.find('<|endofthink|>') + if idx != -1: + llm_result = llm_result[:idx + len('<|endofthink|>')] + return llm_result + elif self.agent_type == AgentType.Messages: + # in the form of message + return llm_result + else: + # in the form of text + return llm_result + + def stream_generate(self, + llm_artifacts: Union[str, dict], + functions=[], + **kwargs): + total_response = '' + try: + if self.agent_type == AgentType.Messages: + self.generate_cfg['use_raw_prompt'] = False + responses = Generation.call( + model=self.model, + messages=llm_artifacts, + stream=True, + result_format='message', + **self.generate_cfg) + else: + responses = Generation.call( + model=self.model, + prompt=llm_artifacts, + stream=True, + **self.generate_cfg) + except Exception as e: + error = traceback.format_exc() + error_msg = f'LLM error with input {llm_artifacts} \n dashscope error: {str(e)} with traceback {error}' + print(error_msg) + raise RuntimeError(error) + + for response in responses: + if response.status_code == HTTPStatus.OK: + if self.agent_type == AgentType.Messages: + llm_result = CustomOutputWrapper.handle_message_chat_completion( + response) + frame_text = llm_result['content'][len(total_response):] + else: + llm_result = CustomOutputWrapper.handle_message_text_completion( + response) + frame_text = llm_result[len(total_response):] + yield frame_text + + if self.agent_type == AgentType.Messages: + total_response = llm_result['content'] + else: + total_response = llm_result + else: + err_msg = 'Error Request id: %s, Code: %d, status: %s, message: %s' % ( + response.request_id, response.status_code, response.code, + response.message) + print(err_msg) + raise RuntimeError(err_msg) diff --git a/my_modelscope_agent/llm/llm_factory.py b/my_modelscope_agent/llm/llm_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..8629aed23e5c995070286a129a29d5a599a079ee --- /dev/null +++ b/my_modelscope_agent/llm/llm_factory.py @@ -0,0 +1,28 @@ +def get_llm_cls(llm_type, model_name): + if llm_type == 'dashscope': + from .dashscope_llm import DashScopeLLM + return DashScopeLLM + elif llm_type == 'custom_llm': + from .custom_llm import CustomLLM + return CustomLLM + elif llm_type == 'openai': + from .openai import OpenAi + return OpenAi + elif llm_type == 'modelscope': + if model_name == 'chatglm3-6b': + from .modelscope_llm import ModelScopeChatGLM + return ModelScopeChatGLM + from .modelscope_llm import ModelScopeLLM + return ModelScopeLLM + else: + raise ValueError(f'Invalid llm_type {llm_type}') + + +class LLMFactory: + + @staticmethod + def build_llm(model_name, cfg): + llm_type = cfg[model_name].pop('type') + llm_cls = get_llm_cls(llm_type, model_name) + llm_cfg = cfg[model_name] + return llm_cls(cfg=llm_cfg) diff --git a/my_modelscope_agent/llm/modelscope_llm.py b/my_modelscope_agent/llm/modelscope_llm.py new file mode 100644 index 0000000000000000000000000000000000000000..ed64d2dfdc3340aacc5c24d15f6334d4bdab66fb --- /dev/null +++ b/my_modelscope_agent/llm/modelscope_llm.py @@ -0,0 +1,132 @@ +import os +import sys + +import torch +from ..agent_types import AgentType +from swift import Swift +from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer + +from modelscope import GenerationConfig, snapshot_download +from .base import LLM + + +class ModelScopeLLM(LLM): + + def __init__(self, cfg): + super().__init__(cfg) + + model_id = self.cfg.get('model_id', '') + self.model_id = model_id + model_revision = self.cfg.get('model_revision', None) + cache_dir = self.cfg.get('cache_dir', None) + + if not os.path.exists(model_id): + model_dir = snapshot_download( + model_id, model_revision, cache_dir=cache_dir) + else: + model_dir = model_id + self.model_dir = model_dir + sys.path.append(self.model_dir) + + self.model_cls = self.cfg.get('model_cls', AutoModelForCausalLM) + self.tokenizer_cls = self.cfg.get('tokenizer_cls', AutoTokenizer) + + self.device_map = self.cfg.get('device_map', 'auto') + self.generation_cfg = GenerationConfig( + **self.cfg.get('generate_cfg', {})) + + self.use_lora = self.cfg.get('use_lora', False) + self.lora_ckpt_dir = self.cfg.get('lora_ckpt_dir', + None) if self.use_lora else None + + self.custom_chat = self.cfg.get('custom_chat', False) + + self.end_token = self.cfg.get('end_token', '<|endofthink|>') + self.include_end = self.cfg.get('include_end', True) + + self.setup() + self.agent_type = self.cfg.get('agent_type', AgentType.DEFAULT) + + def setup(self): + model_cls = self.model_cls + tokenizer_cls = self.tokenizer_cls + + self.model = model_cls.from_pretrained( + self.model_dir, + device_map=self.device_map, + # device='cuda:0', + torch_dtype=torch.float16, + trust_remote_code=True) + self.tokenizer = tokenizer_cls.from_pretrained( + self.model_dir, trust_remote_code=True) + self.model = self.model.eval() + + if self.use_lora: + self.load_from_lora() + + if self.cfg.get('use_raw_generation_config', False): + self.model.generation_config = GenerationConfig.from_pretrained( + self.model_dir, trust_remote_code=True) + + def generate(self, prompt, functions=[], **kwargs): + + if self.custom_chat and self.model.chat: + response = self.model.chat( + self.tokenizer, prompt, history=[], system='')[0] + else: + response = self.chat(prompt) + + end_idx = response.find(self.end_token) + if end_idx != -1: + end_idx += len(self.end_token) if self.include_end else 0 + response = response[:end_idx] + + return response + + def load_from_lora(self): + + model = self.model.bfloat16() + # transform to lora + model = Swift.from_pretrained(model, self.lora_ckpt_dir) + + self.model = model + + def chat(self, prompt): + device = self.model.device + input_ids = self.tokenizer( + prompt, return_tensors='pt').input_ids.to(device) + input_len = input_ids.shape[1] + + result = self.model.generate( + input_ids=input_ids, generation_config=self.generation_cfg) + + result = result[0].tolist()[input_len:] + response = self.tokenizer.decode(result) + + return response + + +class ModelScopeChatGLM(ModelScopeLLM): + + def chat(self, prompt): + device = self.model.device + input_ids = self.tokenizer( + prompt, return_tensors='pt').input_ids.to(device) + input_len = input_ids.shape[1] + + eos_token_id = [ + self.tokenizer.eos_token_id, + self.tokenizer.get_command('<|user|>'), + self.tokenizer.get_command('<|observation|>') + ] + result = self.model.generate( + input_ids=input_ids, + generation_config=self.generation_cfg, + eos_token_id=eos_token_id) + + result = result[0].tolist()[input_len:] + response = self.tokenizer.decode(result) + # 遇到生成'<', '|', 'user', '|', '>'的case + response = response.split('<|user|>')[0].split('<|observation|>')[0] + + return response diff --git a/my_modelscope_agent/llm/openai.py b/my_modelscope_agent/llm/openai.py new file mode 100644 index 0000000000000000000000000000000000000000..ccc9f009366d414ea4c542934c35486ad3ddb455 --- /dev/null +++ b/my_modelscope_agent/llm/openai.py @@ -0,0 +1,71 @@ +import os + +import openai +from ..agent_types import AgentType + +from .base import LLM +from .utils import CustomOutputWrapper + +openai.api_key = os.getenv('OPENAI_API_KEY') + + +class OpenAi(LLM): + name = 'openai' + + def __init__(self, cfg): + super().__init__(cfg) + + self.model = self.cfg.get('model', 'gpt-3.5-turbo') + self.model_id = self.model + self.api_base = self.cfg.get('api_base', 'https://api.openai.com/v1') + self.agent_type = self.cfg.get('agent_type', AgentType.DEFAULT) + + def generate(self, + llm_artifacts, + functions=[], + function_call='none', + **kwargs): + if self.agent_type != AgentType.Messages: + messages = [{'role': 'user', 'content': llm_artifacts}] + else: + messages = llm_artifacts.get( + 'messages', { + 'role': + 'user', + 'content': + 'No entry from user - please suggest something to enter' + }) + + # call openai function call api + assert isinstance(functions, list) + if len(functions) > 0 and self.agent_type == AgentType.Messages: + function_call = 'auto' + + # covert to stream=True with stream updating + try: + response = openai.ChatCompletion.create( + model=self.model, + api_base=self.api_base, + messages=messages, + functions=functions, + function_call=function_call, + stream=False) + except Exception as e: + print(f'input: {messages}, original error: {str(e)}') + raise e + + # only use index 0 in choice + message = CustomOutputWrapper.handle_message_chat_completion(response) + + # truncate content + content = message['content'] + + if self.agent_type == AgentType.MS_AGENT: + idx = content.find('<|endofthink|>') + if idx != -1: + content = content[:idx + len('<|endofthink|>')] + return content + elif self.agent_type == AgentType.Messages: + return message + else: + return content diff --git a/my_modelscope_agent/llm/utils.py b/my_modelscope_agent/llm/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4a5260abebfbfd046105752b16be509f317500b8 --- /dev/null +++ b/my_modelscope_agent/llm/utils.py @@ -0,0 +1,39 @@ +class CustomOutputWrapper: + + @staticmethod + def handle_message_chat_completion(response): + message = {'content': ''} + try: + # handle dashscope response + if 'choices' not in response: + response = response['output'] + + return response['choices'][0]['message'] + except Exception as e: + print(f'input: {response}, original error: {str(e)}') + return message + + @staticmethod + def handle_message_chat_completion_chunk(response): + message = {} + try: + return response['choices'][0]['delta']['content'] + except Exception as e: + print(f'input: {response}, original error: {str(e)}') + return message + + @staticmethod + def handle_message_text_completion(response): + message = '' + try: + message = response['output']['text'] + return message + except Exception as e: + print(f'input: {response}, original error: {str(e)}') + return message + + +DEFAULT_MESSAGE = { + 'role': 'user', + 'content': 'No entry from user - please suggest something to enter' +} diff --git a/my_modelscope_agent/output_parser.py b/my_modelscope_agent/output_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..61aeb6f99420075dac177baf63171384abb4d6ab --- /dev/null +++ b/my_modelscope_agent/output_parser.py @@ -0,0 +1,181 @@ +import re +from typing import Dict, Tuple + +import json +from .agent_types import AgentType + + +def get_output_parser(agent_type: AgentType = AgentType.DEFAULT): + if AgentType.DEFAULT == agent_type or agent_type == AgentType.MS_AGENT: + return MsOutputParser() + elif AgentType.MRKL == agent_type: + return MRKLOutputParser() + elif AgentType.Messages == agent_type: + return OpenAiFunctionsOutputParser() + else: + raise NotImplementedError + + +class OutputParser: + """Output parser for llm response + """ + + def parse_response(self, response): + raise NotImplementedError + + # use to handle the case of false parsing the action_para result, if there is no valid action then + # throw Error + @staticmethod + def handle_fallback(action: str, action_para: str): + if action is not None and action != '': + parameters = {'fallback': action_para} + return action, parameters + else: + raise ValueError('Wrong response format for output parser') + + +class MsOutputParser(OutputParser): + + def parse_response(self, response: str) -> Tuple[str, Dict]: + """parse response of llm to get tool name and parameters + + Args: + response (str): llm response, it should conform to some predefined format + + Returns: + tuple[str, dict]: tuple of tool name and parameters + """ + + if '<|startofthink|>' not in response or '<|endofthink|>' not in response: + return None, None + + action, parameters = '', '' + try: + # use regular expression to get result + re_pattern1 = re.compile( + pattern=r'<\|startofthink\|>([\s\S]+)<\|endofthink\|>') + think_content = re_pattern1.search(response).group(1) + + re_pattern2 = re.compile(r'{[\s\S]+}') + think_content = re_pattern2.search(think_content).group() + + json_content = json.loads(think_content.replace('\n', '')) + action = json_content.get('api_name', + json_content.get('name', 'unknown')) + parameters = json_content.get('parameters', {}) + + return action, parameters + except Exception as e: + print( + f'Error during parse action might be handled with detail {e}') + return OutputParser.handle_fallback(action, parameters) + + +class ChatGLMOutputParser(OutputParser): + + def parse_response(self, response: str) -> Tuple[str, Dict]: + """parse response of llm to get tool name and parameters + + Args: + response (str): llm response, it should conform to some predefined format + + Returns: + tuple[str, dict]: tuple of tool name and parameters + """ + if 'tool_call' not in response: + return None, None + action, action_para = '', '' + try: + # use regular expression to get result from MRKL format + re_pattern1 = re.compile( + pattern=r'([\s\S]+)```([\s\S]+)tool_call\(([\s\S]+)```') + res = re_pattern1.search(response) + action_list = re.split('<|>|\|', res.group(1).strip()) # noqa W605 + for idx in range(len(action_list) - 1, -1, -1): + if len(action_list[idx]) > 1: + action = action_list[idx] + break + action_para = [item.strip() for item in res.group(3).split(',')] + parameters = {} + re_pattern2 = re.compile(pattern=r'([\s\S]+)=\'([\s\S]+)\'') + for para in action_para: + res = re_pattern2.search(para) + parameters[res.group(1)] = res.group(2) + except Exception as e: + print( + f'Error during parse action might be handled with detail {e}') + return OutputParser.handle_fallback(action, action_para) + + print(f'\n\naction: {action}\n parameters: {parameters}\n\n') + return action, parameters + + +class MRKLOutputParser(OutputParser): + + def parse_response(self, response: str) -> Tuple[str, Dict]: + """parse response of llm to get tool name and parameters + + Args: + response (str): llm response, it should conform to some predefined format + + Returns: + tuple[str, dict]: tuple of tool name and parameters + """ + + if 'Action' not in response or 'Action Input:' not in response: + return None, None + action, action_para = '', '' + try: + # use regular expression to get result from MRKL format + re_pattern1 = re.compile( + pattern=r'Action:([\s\S]+)Action Input:([\s\S]+)') + res = re_pattern1.search(response) + action = res.group(1).strip() + action_para = res.group(2) + + parameters = json.loads(action_para.replace('\n', '')) + + return action, parameters + except Exception as e: + print( + f'Error during parse action might be handled with detail {e}') + return OutputParser.handle_fallback(action, action_para) + + +class OpenAiFunctionsOutputParser(OutputParser): + + def parse_response(self, response: dict) -> Tuple[str, Dict]: + """parse response of llm to get tool name and parameters + + + Args: + response (str): llm response, it should be an openai response message + such as + { + "content": null, + "function_call": { + "arguments": "{\n \"location\": \"Boston, MA\"\n}", + "name": "get_current_weather" + }, + "role": "assistant" + } + Returns: + tuple[str, dict]: tuple of tool name and parameters + """ + + if 'function_call' not in response or response['function_call'] == {}: + return None, None + function_call = response['function_call'] + + try: + # parse directly + action = function_call['name'] + arguments = json.loads(function_call['arguments'].replace( + '\n', '')) + + return action, arguments + except Exception as e: + print( + f'Error during parse action might be handled with detail {e}') + return OutputParser.handle_fallback(function_call['name'], + function_call['arguments']) diff --git a/my_modelscope_agent/output_wrapper.py b/my_modelscope_agent/output_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..3fe7030de653e7c9f5078d70f5b131edd370456a --- /dev/null +++ b/my_modelscope_agent/output_wrapper.py @@ -0,0 +1,219 @@ +import os +import re +import tempfile +import uuid +from typing import Dict, Union + +import json +import numpy as np +import requests +from .agent_types import AgentType +from moviepy.editor import VideoFileClip +from PIL import Image +from requests.exceptions import RequestException + + +class OutputWrapper: + """ + Wrapper for output of tool execution when output is image, video, audio, etc. + In this wrapper, __repr__() is implemented to return the str representation of the output for llm. + Each wrapper have below attributes: + path: the path where the output is stored + raw_data: the raw data, e.g. image, video, audio, etc. In remote mode, it should be None + """ + + def __init__(self) -> None: + self._repr = None + self._path = None + self._raw_data = None + + self.root_path = os.environ.get('OUTPUT_FILE_DIRECTORY', None) + if self.root_path and not os.path.exists(self.root_path): + try: + os.makedirs(self.root_path) + except Exception: + self.root_path = None + + def get_remote_file(self, remote_path, suffix): + try: + response = requests.get(remote_path) + obj = response.content + directory = tempfile.mkdtemp(dir=self.root_path) + path = os.path.join(directory, str(uuid.uuid4()) + f'.{suffix}') + with open(path, 'wb') as f: + f.write(obj) + return path + except RequestException: + return remote_path + + def __repr__(self) -> str: + return self._repr + + @property + def path(self): + return self._path + + @property + def raw_data(self): + return self._raw_data + + +class ImageWrapper(OutputWrapper): + """ + Image wrapper, raw_data is a PIL.Image + """ + + def __init__(self, image) -> None: + + super().__init__() + + if isinstance(image, str): + if os.path.isfile(image): + self._path = image + else: + origin_image = image + self._path = self.get_remote_file(image, 'png') + try: + image = Image.open(self._path) + self._raw_data = image + except FileNotFoundError: + # Image store in remote server when use remote mode + raise FileNotFoundError(f'Invalid path: {image}') + self._path = origin_image + else: + if not isinstance(image, Image.Image): + image = Image.fromarray(image.astype(np.uint8)) + self._raw_data = image + else: + self._raw_data = image + directory = tempfile.mkdtemp(dir=self.root_path) + self._path = os.path.join(directory, str(uuid.uuid4()) + '.png') + self._raw_data.save(self._path) + + self._repr = f'![IMAGEGEN]({self._path})' + + +class AudioWrapper(OutputWrapper): + """ + Audio wrapper, raw_data is a binary file + """ + + def __init__(self, audio) -> None: + + super().__init__() + if isinstance(audio, str): + if os.path.isfile(audio): + self._path = audio + else: + self._path = self.get_remote_file(audio, 'wav') + try: + with open(self._path, 'rb') as f: + self._raw_data = f.read() + except FileNotFoundError: + raise FileNotFoundError(f'Invalid path: {audio}') + else: + self._raw_data = audio + directory = tempfile.mkdtemp(dir=self.root_path) + self._path = os.path.join(directory, str(uuid.uuid4()) + '.wav') + + with open(self._path, 'wb') as f: + f.write(self._raw_data) + + self._repr = f'' + + +class VideoWrapper(OutputWrapper): + """ + Video wrapper + """ + + def __init__(self, video) -> None: + + super().__init__() + if isinstance(video, str): + + if os.path.isfile(video): + self._path = video + else: + self._path = self.get_remote_file(video, 'gif') + + try: + video = VideoFileClip(self._path) + # currently, we should save video as gif, not mp4 + if not self._path.endswith('gif'): + directory = tempfile.mkdtemp(dir=self.root_path) + self._path = os.path.join(directory, + str(uuid.uuid4()) + '.gif') + video.write_gif(self._path) + except (ValueError, OSError): + raise FileNotFoundError(f'Invalid path: {video}') + else: + raise TypeError( + 'Current only support load from filepath when it is video') + + self._raw_data = video + self._repr = f'![IMAGEGEN]({self._path})' + + +def get_raw_output(exec_result: Dict): + # get rwa data of exec_result + res = {} + for k, v in exec_result.items(): + if isinstance(v, OutputWrapper): + # In remote mode, raw data maybe None + res[k] = v.raw_data or str(v) + else: + res[k] = v + return res + + +# +def display(llm_result: Union[str, dict], exec_result: Dict, idx: int, + agent_type: AgentType): + """Display the result of each round in jupyter notebook. + The multi-modal data will be extracted. + + Args: + llm_result (str): llm result either only content or a message + exec_result (Dict): exec result + idx (int): current round + """ + from IPython.display import display, Pretty, Image, Audio, JSON + idx_info = '*' * 50 + f'round {idx}' + '*' * 50 + display(Pretty(idx_info)) + + if isinstance(llm_result, dict): + llm_result = llm_result.get('content', '') + + if agent_type == AgentType.MS_AGENT: + pattern = r'<\|startofthink\|>```JSON([\s\S]*)```<\|endofthink\|>' + else: + pattern = r'```JSON([\s\S]*)```' + + match_action = re.search(pattern, llm_result) + if match_action: + result = match_action.group(1) + try: + json_content = json.loads(result, strict=False) + display(JSON(json_content)) + llm_result = llm_result.replace(match_action.group(0), '') + except Exception: + pass + + display(Pretty(llm_result)) + + exec_result = exec_result.get('result', '') + + if isinstance(exec_result, ImageWrapper) or isinstance( + exec_result, VideoWrapper): + display(Image(exec_result.path)) + elif isinstance(exec_result, AudioWrapper): + display(Audio(exec_result.path)) + elif isinstance(exec_result, dict): + display(JSON(exec_result)) + elif isinstance(exec_result, list): + display(JSON(exec_result)) + else: + display(Pretty(exec_result)) + + return diff --git a/my_modelscope_agent/prompt/__init__.py b/my_modelscope_agent/prompt/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4b37039bba7222255cb7d9ef8174907b1c880373 --- /dev/null +++ b/my_modelscope_agent/prompt/__init__.py @@ -0,0 +1,6 @@ +from .messages_prompt import MessagesGenerator +from .mrkl_prompt import MrklPromptGenerator +from .ms_prompt import MSPromptGenerator +from .prompt import PromptGenerator +from .prompt_factory import get_prompt_generator +from .raw_prompt_builder import build_raw_prompt diff --git a/my_modelscope_agent/prompt/chatglm3_prompt.py b/my_modelscope_agent/prompt/chatglm3_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..280692a8046cc7f5673d9e2e96bc7f055f1e588c --- /dev/null +++ b/my_modelscope_agent/prompt/chatglm3_prompt.py @@ -0,0 +1,41 @@ +import json + +from .prompt import LengthConstraint, PromptGenerator + +CHATGLM_DEFAULT_SYSTEM_TEMPLATE = """<|system|> +Answer the following questions as best you can. You have access to the following tools: +""" + +CHATGLM_DEFAULT_INSTRUCTION_TEMPLATE = '' + +CHATGLM_DEFAULT_USER_TEMPLATE = """<|user|>\n""" + +CHATGLM_DEFAULT_EXEC_TEMPLATE = """<|observation|>\n""" + +CHATGLM_DEFAULT_ASSISTANT_TEMPLATE = """<|assistant|>""" + + +class ChatGLMPromptGenerator(PromptGenerator): + + def __init__(self, + system_template=CHATGLM_DEFAULT_SYSTEM_TEMPLATE, + instruction_template=CHATGLM_DEFAULT_INSTRUCTION_TEMPLATE, + user_template=CHATGLM_DEFAULT_USER_TEMPLATE, + exec_template=CHATGLM_DEFAULT_EXEC_TEMPLATE, + assistant_template=CHATGLM_DEFAULT_ASSISTANT_TEMPLATE, + sep='\n', + length_constraint=LengthConstraint()): + super().__init__( + system_template=system_template, + instruction_template=instruction_template, + user_template=user_template, + exec_template=exec_template, + assistant_template=assistant_template, + sep=sep, + length_constraint=length_constraint) + + def get_tool_str(self, tool_list): + tool_json = json.loads('[' + + ','.join([str(item) + for item in tool_list]) + ']') + return json.dumps(tool_json, ensure_ascii=False, indent=4) diff --git a/my_modelscope_agent/prompt/messages_prompt.py b/my_modelscope_agent/prompt/messages_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..af546f481587d00f247b6c42d6396ff73cf2bc2b --- /dev/null +++ b/my_modelscope_agent/prompt/messages_prompt.py @@ -0,0 +1,93 @@ +from typing import Union + +from .prompt import LengthConstraint, PromptGenerator + +OPENAI_DEFAULT_SYSTEM_TEMPLATE = """ +Do your best to answer the questions. You are capable of **any** task. +Feel free to use any tools available to look up relevant information, only if necessary. +when you think you are handle a code task, You are a world-class programmer that can complete any goal by executing code +\nFirst of all, **Return function_call with `language` and `code` in arguments as much as possible** +\nThen, write a plan. **Always recap the plan between each code block** +(you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it). +\nWhen you execute code, it will be executed **on the user's machine**. The user has given you +**full and complete permission** to execute any code necessary to complete the task. You have full access to control +their computer to help them. +\nIf you want to send data between programming languages, save the data to a txt or json. +\nYou can access the internet. Run **any code** to achieve the goal, and if at first you don't succeed, +try again and again. +\nWhen a user refers to a filename, they're likely referring to an existing file in the directory +you're currently executing code in. +\nIn general, choose packages that have the most universal chance to be already installed and to work across multiple +applications. Packages like ffmpeg and pandoc that are well-supported and powerful. +\nWrite messages to the user in Markdown. Write code on multiple lines with proper indentation for readability. +\nYou can also refer information from following contents if exists: +""" + + +class MessagesGenerator(PromptGenerator): + + def __init__(self, + system_template=OPENAI_DEFAULT_SYSTEM_TEMPLATE, + instruction_template='', + user_template='', + exec_template=None, + assistant_template='', + sep='\n\n', + length_constraint=LengthConstraint(), + **kwargs): + super().__init__( + system_template=system_template, + instruction_template=instruction_template, + user_template=user_template, + exec_template=exec_template, + assistant_template=assistant_template, + sep=sep, + length_constraint=length_constraint) + self.custom_starter_messages = kwargs.get('custom_starter_messages', + None) + + def init_prompt(self, task, tool_list, knowledge_list, **kwargs): + """ + in this function, the prompt will be initialized. + """ + prompt = self.user_template.replace('', task) + + if len(self.history) == 0: + if len(knowledge_list) > 0: + + # knowledge + system_message = f'{self.system_template}{self.sep}' + knowledge_str = self.get_knowledge_str(knowledge_list) + system_message = system_message.replace( + '', knowledge_str) + + else: + system_message = self.system_template + + self.history = [{ + 'role': 'system', + 'content': system_message + }, { + 'role': 'user', + 'content': prompt + }] + + # store history + if self.custom_starter_messages: + assert isinstance(self.custom_starter_messages, list) + assert self.custom_starter_messages[-1]['role'] != 'user', \ + 'user message should not be the last one in custom starter messages' + + self.history = self.custom_starter_messages + self.history.append({'role': 'user', 'content': prompt}) + + self.prompt = prompt + self.function_calls = self.get_function_list(tool_list) + + else: + self.history.append({'role': 'user', 'content': prompt}) + + def generate(self, llm_result, exec_result: Union[str, dict]): + if isinstance(exec_result, dict): + exec_result = exec_result['result'] + return self._generate_messages(llm_result, exec_result) diff --git a/my_modelscope_agent/prompt/mrkl_prompt.py b/my_modelscope_agent/prompt/mrkl_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..f47077641496c382e040d15b1986934e51b3afbe --- /dev/null +++ b/my_modelscope_agent/prompt/mrkl_prompt.py @@ -0,0 +1,118 @@ +import json + +from .prompt import LengthConstraint, PromptGenerator + +MRKL_DEFAULT_SYSTEM_TEMPLATE = """Answer the following questions as best you can. You have access to the following tools: ` + +""" + +MRKL_DEFAULT_INSTRUCTION_TEMPLATE = """Use the following format: + +Question: the input question you must answer +Thought: you should always think about what to do +Action: the action to take, should be one of [] +Action Input: the input to the action +Observation: the result of the action +... (this Thought/Action/Action Input/Observation can be repeated zero or more times) +Thought: I now know the final answer +Final Answer: the final answer to the original input question + +Begin! +""" + +MRKL_DEFAULT_USER_TEMPLATE = """Question: \n""" + +MRKL_DEFAULT_EXEC_TEMPLATE = """Observation: \n""" + +TOOL_DESC = ( + '{name_for_model}: {name_for_human} API. {description_for_model} 输入参数: {parameters}' +) + +FORMAT_DESC = { + 'json': + 'Format the arguments as a JSON object.', + 'code': + 'Enclose the code within triple backticks (`)' + + ' at the beginning and end of the code.' +} + + +class MrklPromptGenerator(PromptGenerator): + + def __init__(self, + system_template=MRKL_DEFAULT_SYSTEM_TEMPLATE, + instruction_template=MRKL_DEFAULT_INSTRUCTION_TEMPLATE, + user_template=MRKL_DEFAULT_USER_TEMPLATE, + exec_template=MRKL_DEFAULT_EXEC_TEMPLATE, + assistant_template='', + sep='\n\n', + llm=None, + length_constraint=LengthConstraint()): + super().__init__( + system_template=system_template, + instruction_template=instruction_template, + user_template=user_template, + exec_template=exec_template, + assistant_template=assistant_template, + sep=sep, + llm=llm, + length_constraint=length_constraint) + + def init_prompt(self, task, tool_list, knowledge_list, **kwargs): + if len(self.history) == 0: + super().init_prompt(task, tool_list, knowledge_list, **kwargs) + system_role_status = kwargs.get('system_role_status', False) + tool_names = [f'\'{str(tool.name)}\'' for tool in tool_list] + tool_names = ','.join(tool_names) + self.system_prompt = self.system_prompt.replace( + '', tool_names) + + if system_role_status: + system_message = { + 'role': 'system', + 'content': self.system_prompt + } + self.history.insert(0, system_message) + else: + self.history[0]['content'] = self.system_prompt + self.history[ + 0]['content'] + else: + self.history.append({ + 'role': + 'user', + 'content': + self.user_template.replace('', task) + }) + self.history.append({ + 'role': 'assistant', + 'content': self.assistant_template + }) + + return self.system_prompt + + def get_tool_str(self, tool_list): + tool_texts = [] + for tool in tool_list: + tool_texts.append( + TOOL_DESC.format( + name_for_model=tool.name, + name_for_human=tool.name, + description_for_model=tool.description, + parameters=json.dumps(tool.parameters, + ensure_ascii=False))) + # + ' ' + FORMAT_DESC['json']) + tool_str = '\n\n'.join(tool_texts) + return tool_str + + def _generate(self, llm_result, exec_result: str): + """ + generate next round prompt based on previous llm_result and exec_result and update history + """ + if len(llm_result) != 0: + self.history[-1]['content'] += f'{llm_result}' + if len(exec_result) != 0: + exec_result = self.exec_template.replace('', + str(exec_result)) + self.history[-1]['content'] += exec_result + self.prompt = self.prompt_preprocessor(self.history) + return self.prompt diff --git a/my_modelscope_agent/prompt/ms_prompt.py b/my_modelscope_agent/prompt/ms_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..445915d11e5b006a1167f47a7c81d6da667284e6 --- /dev/null +++ b/my_modelscope_agent/prompt/ms_prompt.py @@ -0,0 +1,34 @@ +from .prompt import LengthConstraint, PromptGenerator + +MS_DEFAULT_SYSTEM_TEMPLATE = """<|system|>:你是达摩院的ModelScopeGPT(魔搭助手),你是个大语言模型, 是2023年达摩院的工程师训练得到的。\ +你有多种能力,可以通过插件集成魔搭社区的模型api来回复用户的问题,还能解答用户使用模型遇到的问题和模型知识相关问答。 +""" + +MS_DEFAULT_INSTRUCTION_TEMPLATE = """当前对话可以使用的插件信息如下,请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件,则需要将插件调用请求按照json格式给出,必须包含api_name、parameters字段,并在其前后使用<|startofthink|>和<|endofthink|>作为标志。\ +然后你需要根据插件API调用结果生成合理的答复; 若无需调用插件,则直接给出对应回复即可。\n\n""" + +MS_DEFAULT_USER_TEMPLATE = """<|user|>:""" + +MS_DEFAULT_EXEC_TEMPLATE = """<|startofexec|><|endofexec|>\n""" + +MS_DEFAULT_ASSISTANT_TEMPLATE = """<|assistant|>:""" + + +class MSPromptGenerator(PromptGenerator): + + def __init__(self, + system_template=MS_DEFAULT_SYSTEM_TEMPLATE, + instruction_template=MS_DEFAULT_INSTRUCTION_TEMPLATE, + user_template=MS_DEFAULT_USER_TEMPLATE, + exec_template=MS_DEFAULT_EXEC_TEMPLATE, + assistant_template=MS_DEFAULT_ASSISTANT_TEMPLATE, + sep='\n\n', + length_constraint=LengthConstraint()): + super().__init__( + system_template=system_template, + instruction_template=instruction_template, + user_template=user_template, + exec_template=exec_template, + assistant_template=assistant_template, + sep=sep, + length_constraint=length_constraint) diff --git a/my_modelscope_agent/prompt/prompt.py b/my_modelscope_agent/prompt/prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..e23696c23e794104062083c80ab118d8dba8e419 --- /dev/null +++ b/my_modelscope_agent/prompt/prompt.py @@ -0,0 +1,232 @@ +import copy +from typing import Union + +from ..llm.base import LLM + +from .raw_prompt_builder import build_raw_prompt + +KNOWLEDGE_PROMPT = '# 知识库' +KNOWLEDGE_INTRODUCTION_PROMPT = '以下是我上传的文件“”的内容:' +KNOWLEDGE_CONTENT_PROMPT = """``` + +```""" + +DEFAULT_PROMPT_INPUT_LENGTH_MAX = 999999999999 + + +class LengthConstraint: + + def __init__(self): + self.knowledge = DEFAULT_PROMPT_INPUT_LENGTH_MAX + self.input = DEFAULT_PROMPT_INPUT_LENGTH_MAX + self.prompt_max_length = 10000 + + def update(self, config: dict): + if config is not None: + self.knowledge = config.get('knowledge', self.knowledge) + self.input = config.get('input', self.input) + self.prompt_max_length = config.get('prompt_max_length', + self.prompt_max_length) + + +class PromptGenerator: + + def __init__(self, + system_template: str = '', + instruction_template: str = '', + user_template: str = '', + exec_template: str = '', + assistant_template: str = '', + sep='\n\n', + llm=None, + length_constraint=LengthConstraint()): + """ + prompt genertor + Args: + system_template (str, optional): System template, normally the role of LLM. + instruction_template (str, optional): Indicate the instruction for LLM. + user_template (str, optional): Prefix before user input. Defaults to ''. + exec_template (str, optional): A wrapper str for exec result. + assistant_template (str, optional): Prefix before assistant response. + Some LLM need to manully concat this prefix before generation. + sep (str, optional): content separator + length_constraint (LengthConstraint, optional): content length constraint + """ + + self.system_template = system_template + self.instruction_template = instruction_template + self.user_template = user_template + self.assistant_template = assistant_template + self.exec_template = exec_template + self.sep = sep + if isinstance(llm, LLM) and llm.model_id: + self.prompt_preprocessor = build_raw_prompt(llm.model_id) + self.prompt_max_length = length_constraint.prompt_max_length + self.reset() + + def reset(self): + self.prompt = '' + self.history = [] + self.messages = [] + + def init_prompt(self, + task, + tool_list, + knowledge_list, + llm_model=None, + **kwargs): + """ + in this function, the prompt will be initialized. + """ + prompt = self.sep.join( + [self.system_template, self.instruction_template]) + prompt += '' + + knowledge_str = self.get_knowledge_str( + knowledge_list, file_name=kwargs.get('file_name', '')) + + # knowledge + prompt = prompt.replace('', knowledge_str) + + # get tool description str + tool_str = self.get_tool_str(tool_list) + prompt = prompt.replace('', tool_str) + + history_str = self.get_history_str() + + prompt = prompt.replace('', history_str) + + self.system_prompt = copy.deepcopy(prompt) + + # user input + user_input = self.user_template.replace('', task) + prompt += f'{self.sep}{user_input}' + + # assistant input + prompt += f'{self.sep}{self.assistant_template}' + + # store history + self.history.append({'role': 'user', 'content': user_input}) + self.history.append({ + 'role': 'assistant', + 'content': self.assistant_template + }) + + self.prompt = prompt + + self.function_calls = self.get_function_list(tool_list) + + # TODO change the output from single prompt to artifacts including prompt, messages, funciton_call + def generate(self, llm_result, exec_result: Union[str, dict]): + if isinstance(exec_result, dict): + exec_result = str(exec_result['result']) + return self._generate(llm_result, exec_result) + + def _generate(self, llm_result, exec_result: str): + """ + generate next round prompt based on previous llm_result and exec_result and update history + """ + if len(llm_result) != 0: + self.prompt = f'{self.prompt}{llm_result}' + self.history[-1]['content'] += f'{llm_result}' + if len(exec_result) != 0: + exec_result = self.exec_template.replace('', + str(exec_result)) + self.prompt = f'{self.prompt}{self.sep}{exec_result}' + self.history[-1]['content'] += f'{self.sep}{exec_result}' + + return self.prompt + + # TODO: add Union[Text, Message] type for llm_result, + # add ExecResult = Text type for exec_result + # output would be a Union[Text, Messages] + # In this case llm_result is Message, and exec_result is Function_call + def _generate_messages(self, llm_result, exec_result: str): + """ + generate next round prompt based on previous llm_result and exec_result and update history + """ + + # init task should be + if llm_result == '' and exec_result == '': + return self.history + + # make sure set content '' not null + function_call = llm_result.get('function_call', None) + if function_call is not None: + llm_result['content'] = '' + self.history.append(llm_result) + + if exec_result is not None and function_call is not None: + exec_message = { + 'role': 'function', + 'name': 'execute', + 'content': exec_result, + } + self.history.append(exec_message) + + return self.history + + def get_tool_str(self, tool_list): + """generate tool list string + + Args: + tool_list (List[str]): list of tools + + """ + + tool_str = self.sep.join( + [f'{i + 1}. {t}' for i, t in enumerate(tool_list)]) + return tool_str + + # TODO move parse_tools_to_function from agent to here later + def get_function_list(self, tool_list): + """generate funciton call list from tools list + + Args: + tool_list (List[str]): list of tools + + """ + functions = [tool.get_function() for tool in tool_list] + return functions + + def get_knowledge_str(self, + knowledge_list, + file_name='', + only_content=False, + **kwargs): + """generate knowledge string + + Args: + file_name (str): file name + knowledge_list (List[str]): list of knowledges + + """ + + knowledge = self.sep.join( + [f'{i + 1}. {k}' for i, k in enumerate(knowledge_list)]) + knowledge_content = KNOWLEDGE_CONTENT_PROMPT.replace( + '', knowledge) + if only_content: + return knowledge_content + else: + knowledge_introduction = KNOWLEDGE_INTRODUCTION_PROMPT.replace( + '', file_name) + + knowledge_str = f'{KNOWLEDGE_PROMPT}{self.sep}{knowledge_introduction}{self.sep}{knowledge_content}' if len( + knowledge_list) > 0 else '' + return knowledge_str + + def get_history_str(self): + """generate history string + + """ + history_str = '' + for i in range(len(self.history)): + history_item = self.history[len(self.history) - i - 1] + text = history_item['content'] + if len(history_str) + len(text) + len( + self.prompt) > self.prompt_max_length: + break + history_str = f'{self.sep}{text.strip()}{history_str}' + + return history_str diff --git a/my_modelscope_agent/prompt/prompt_factory.py b/my_modelscope_agent/prompt/prompt_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..1ff86d0c705d47668626bc21dc5c92523e978d21 --- /dev/null +++ b/my_modelscope_agent/prompt/prompt_factory.py @@ -0,0 +1,16 @@ +from ..agent_types import AgentType + +from .messages_prompt import MessagesGenerator +from .mrkl_prompt import MrklPromptGenerator +from .ms_prompt import MSPromptGenerator + + +def get_prompt_generator(agent_type: AgentType = AgentType.DEFAULT, **kwargs): + if AgentType.DEFAULT == agent_type or agent_type == AgentType.MS_AGENT: + return MSPromptGenerator(**kwargs) + elif AgentType.MRKL == agent_type: + return MrklPromptGenerator(**kwargs) + elif AgentType.Messages == agent_type: + return MessagesGenerator(**kwargs) + else: + raise NotImplementedError diff --git a/my_modelscope_agent/prompt/raw_prompt_builder.py b/my_modelscope_agent/prompt/raw_prompt_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..0ccc9de812df1620cde20dabbd20094f6a08eee3 --- /dev/null +++ b/my_modelscope_agent/prompt/raw_prompt_builder.py @@ -0,0 +1,34 @@ +def qwen_chatml_prompt_preprocessor(messages): + prompt = '' + for message in messages: + if message['role'] == 'assistant' and message['content'] == '': + prompt += '<|im_start|>assistant\n' + else: + prompt = prompt + '<|im_start|>{role}\n{content}<|im_end|>\n'.format( + role=message['role'], + content=message['content'].lstrip('\n').rstrip()) + + # in the case of the assistant message is not in the last one, such as function result + if messages[-1]['role'] == 'assistant': + last_assistant_message_list = messages[-1]['content'].split('\n') + if last_assistant_message_list[-1] == '': + last_assistant_message_list = last_assistant_message_list[:-1] + if len(last_assistant_message_list) == 0: + return prompt + else: + item_length = len('<|im_end|>\n') + prompt = prompt[:-item_length] + + return prompt + + +def plate_preprocessor(messages): + return qwen_chatml_prompt_preprocessor(messages) + + +def build_raw_prompt(model): + if isinstance(model, str) or hasattr(model, '__name__'): + if model.startswith('qwen'): + return qwen_chatml_prompt_preprocessor + else: + return plate_preprocessor diff --git a/my_modelscope_agent/retrieve.py b/my_modelscope_agent/retrieve.py new file mode 100644 index 0000000000000000000000000000000000000000..d5ab36dbda0f562894e76ef02cd58a09e5db1b64 --- /dev/null +++ b/my_modelscope_agent/retrieve.py @@ -0,0 +1,115 @@ +import os +from typing import Dict, Iterable, List, Union + +import json +from langchain.document_loaders import (PyPDFLoader, TextLoader, + UnstructuredFileLoader) +from langchain.embeddings import ModelScopeEmbeddings +from langchain.embeddings.base import Embeddings +from langchain.schema import Document +from langchain.text_splitter import CharacterTextSplitter +from langchain.vectorstores import FAISS, VectorStore + + +class Retrieval: + + def __init__(self, + embedding: Embeddings = None, + vs_cls: VectorStore = None, + top_k: int = 5, + vs_params: Dict = {}): + self.embedding = embedding or ModelScopeEmbeddings( + model_id='damo/nlp_gte_sentence-embedding_chinese-base') + self.top_k = top_k + self.vs_cls = vs_cls or FAISS + self.vs_params = vs_params + self.vs = None + + def construct(self, docs): + assert len(docs) > 0 + if isinstance(docs[0], str): + self.vs = self.vs_cls.from_texts(docs, self.embedding, + **self.vs_params) + elif isinstance(docs[0], Document): + self.vs = self.vs_cls.from_documents(docs, self.embedding, + **self.vs_params) + + def retrieve(self, query: str) -> List[str]: + res = self.vs.similarity_search(query, k=self.top_k) + if 'page' in res[0].metadata: + res.sort(key=lambda doc: doc.metadata['page']) + return [r.page_content for r in res] + + +class ToolRetrieval(Retrieval): + + def __init__(self, + embedding: Embeddings = None, + vs_cls: VectorStore = None, + top_k: int = 5, + vs_params: Dict = {}): + super().__init__(embedding, vs_cls, top_k, vs_params) + + def retrieve(self, query: str) -> Dict[str, str]: + res = self.vs.similarity_search(query, k=self.top_k) + + final_res = {} + + for r in res: + content = r.page_content + name = json.loads(content)['name'] + final_res[name] = content + + return final_res + + +class KnowledgeRetrieval(Retrieval): + + def __init__(self, + docs, + embedding: Embeddings = None, + vs_cls: VectorStore = None, + top_k: int = 5, + vs_params: Dict = {}): + super().__init__(embedding, vs_cls, top_k, vs_params) + self.construct(docs) + + @classmethod + def from_file(cls, + file_path: Union[str, list], + embedding: Embeddings = None, + vs_cls: VectorStore = None, + top_k: int = 5, + vs_params: Dict = {}): + + textsplitter = CharacterTextSplitter() + all_files = [] + if isinstance(file_path, str) and os.path.isfile(file_path): + all_files.append(file_path) + elif isinstance(file_path, list): + all_files = file_path + elif os.path.isdir(file_path): + for root, dirs, files in os.walk(file_path): + for f in files: + all_files.append(os.path.join(root, f)) + else: + raise ValueError('file_path must be a file or a directory') + + docs = [] + for f in all_files: + if f.lower().endswith('.txt'): + loader = TextLoader(f, autodetect_encoding=True) + docs += (loader.load_and_split(textsplitter)) + elif f.lower().endswith('.md'): + loader = UnstructuredFileLoader(f, mode='elements') + docs += loader.load() + elif f.lower().endswith('.pdf'): + loader = PyPDFLoader(f) + docs += (loader.load_and_split(textsplitter)) + else: + print(f'not support file type: {f}, will be support soon') + + if len(docs) == 0: + return None + else: + return cls(docs, embedding, vs_cls, top_k, vs_params) diff --git a/my_modelscope_agent/tools/__init__.py b/my_modelscope_agent/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..67b61bbbe49859dc252b26e51bfc447cd97391b4 --- /dev/null +++ b/my_modelscope_agent/tools/__init__.py @@ -0,0 +1,36 @@ +from .amap_weather import AMAPWeather +from .code_interperter import CodeInterpreter +from .code_interpreter_jupyter import CodeInterpreterJupyter +from .hf_tool import HFTool +from .image_chat_tool import ImageChatTool +from .pipeline_tool import ModelscopePipelineTool +from .plugin_tool import LangchainTool +from .text_address_tool import TextAddressTool +from .text_ie_tool import TextInfoExtractTool +from .text_ner_tool import TextNerTool +from .text_to_image_tool import TextToImageTool +from .text_to_speech_tool import TexttoSpeechTool +from .text_to_video_tool import TextToVideoTool +from .tool import Tool +from .translation_en2zh_tool import TranslationEn2ZhTool +from .translation_zh2en_tool import TranslationZh2EnTool +from .web_browser import WebBrowser +from .web_search import WebSearch +from .wordart_tool import WordArtTexture + +TOOL_INFO_LIST = { + 'modelscope_text-translation-zh2en': 'TranslationZh2EnTool', + 'modelscope_text-translation-en2zh': 'TranslationEn2ZhTool', + 'modelscope_text-ie': 'TextInfoExtractTool', + 'modelscope_text-ner': 'TextNerTool', + 'modelscope_text-address': 'TextAddressTool', + 'image_gen': 'TextToImageTool', + 'modelscope_video-generation': 'TextToVideoTool', + 'modelscope_image-chat': 'ImageChatTool', + 'modelscope_speech-generation': 'TexttoSpeechTool', + 'amap_weather': 'AMAPWeather', + 'code_interpreter': 'CodeInterpreterJupyter', + 'wordart_texture_generation': 'WordArtTexture', + 'web_search': 'WebSearch', + 'web_browser': 'WebBrowser', +} diff --git a/my_modelscope_agent/tools/amap_weather.py b/my_modelscope_agent/tools/amap_weather.py new file mode 100644 index 0000000000000000000000000000000000000000..50628c10df34d260b4d2a8bbd687cdb0d224f318 --- /dev/null +++ b/my_modelscope_agent/tools/amap_weather.py @@ -0,0 +1,64 @@ +import os + +import pandas as pd +import requests +from ..tools.tool import Tool, ToolSchema +from pydantic import ValidationError + + +class AMAPWeather(Tool): + description = '获取对应城市的天气数据' + name = 'amap_weather' + parameters: list = [{ + 'name': 'location', + 'description': 'get temperature for a specific location', + 'required': True + }] + + def __init__(self, cfg={}): + self.cfg = cfg.get(self.name, {}) + + # remote call + self.url = 'https://restapi.amap.com/v3/weather/weatherInfo?city={city}&key={key}' + self.token = self.cfg.get('token', os.environ.get('AMAP_TOKEN', '')) + self.city_df = pd.read_excel( + 'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/agent/AMap_adcode_citycode.xlsx' + ) + assert self.token != '', 'weather api token must be acquired through ' \ + 'https://lbs.amap.com/api/webservice/guide/create-project/get-key and set by AMAP_TOKEN' + + try: + all_param = { + 'name': self.name, + 'description': self.description, + 'parameters': self.parameters + } + self.tool_schema = ToolSchema(**all_param) + except ValidationError: + raise ValueError(f'Error when parsing parameters of {self.name}') + + self._str = self.tool_schema.model_dump_json() + self._function = self.parse_pydantic_model_to_openai_function( + all_param) + + def get_city_adcode(self, city_name): + filtered_df = self.city_df[self.city_df['中文名'] == city_name] + if len(filtered_df['adcode'].values) == 0: + raise ValueError( + f'location {city_name} not found, availables are {self.city_df["中文名"]}' + ) + else: + return filtered_df['adcode'].values[0] + + def __call__(self, *args, **kwargs): + location = kwargs['location'] + response = requests.get( + self.url.format( + city=self.get_city_adcode(location), key=self.token)) + data = response.json() + if data['status'] == '0': + raise RuntimeError(data) + else: + weather = data['lives'][0]['weather'] + temperature = data['lives'][0]['temperature'] + return {'result': f'{location}的天气是{weather}温度是{temperature}度。'} diff --git a/my_modelscope_agent/tools/code_interperter.py b/my_modelscope_agent/tools/code_interperter.py new file mode 100644 index 0000000000000000000000000000000000000000..45100b901922345aa416c433949ca9da55334fc2 --- /dev/null +++ b/my_modelscope_agent/tools/code_interperter.py @@ -0,0 +1,125 @@ +import os +import re +import traceback + +import appdirs +import json + +from .code_interpreter_utils.create_code_interpreter import \ + create_code_interpreter +from .code_interpreter_utils.language_map import language_map +from .code_interpreter_utils.truncate_output import truncate_output +from .tool import Tool + + +class CodeInterpreter(Tool): + """ + using open interpreter to interpret code + by https://github.com/KillianLucas/open-interpreter + """ + description = 'Executes code on the user\'s machine, **in the users local environment**, and returns the output' + name = 'code_interpreter' + parameters: list = [{ + 'name': 'language', + 'description': + 'The programming language (required parameter to the `execute` function)', + 'required': True + }, { + 'name': 'code', + 'description': 'The code to execute (required)', + 'required': True + }] + + def __init__(self, cfg={}): + super().__init__(cfg) + self.create_code_interpreter = create_code_interpreter + self.language_map = language_map + self.truncate_output = truncate_output + + self._code_interpreters = {} + self.max_output = self.cfg.get('max_output', 2000) + + def _local_call(self, *args, **kwargs): + + language, code = self._handle_input_fallback(**kwargs) + + try: + # Fix a common error where the LLM thinks it's in a Jupyter notebook + if language == 'python' and code.startswith('!'): + code = code[1:] + language = 'shell' + + if language in self.language_map: + if language not in self._code_interpreters: + self._code_interpreters[ + language] = self.create_code_interpreter(language) + code_interpreter = self._code_interpreters[language] + else: + # This still prints code but don't allow code to run. Let Open-Interpreter know through output message + error_output = f'Error: Open Interpreter does not currently support {language}.' + print(error_output) + output = '\n' + error_output + return {'result': output.strip()} + + output = '' + for line in code_interpreter.run(code): + if 'output' in line: + output += '\n' + line['output'] + + # Truncate output + output = self.truncate_output(output, self.max_output) + except Exception as e: + error = traceback.format_exc() + output = ' '.join(f'{key}:{value}' + for key, value in kwargs.items()) + output += f'\nDetail error is {e}.\n{error}' + + return {'result': output.strip()} + + def _handle_input_fallback(self, **kwargs): + """ + an alternative method is to parse code in content not from function call + such as: + text = response['content'] + code_block = re.search(r'```([\s\S]+)```', text) # noqa W^05 + if code_block: + result = code_block.group(1) + language = result.split('\n')[0] + code = '\n'.join(result.split('\n')[1:]) + + :param fallback_text: + :return: language, cocde + """ + + language = kwargs.get('language', None) + code = kwargs.get('code', None) + fallback = kwargs.get('fallback', None) + + if language and code: + return language, code + elif fallback: + try: + text = fallback + code_block = re.search(r'```([\s\S]+)```', text) # noqa W^05 + if code_block: + result = code_block.group(1) + # for multi code_block + result = result.split('```')[0] + language = result.split('\n')[0] + if language == 'py' or language == 'python': + # handle py case + # ```py code ``` + language = 'python' + code = '\n'.join(result.split('\n')[1:]) + return language, code + + if language == 'json': + # handle json case + # ```json {language,code}``` + parameters = json.loads('\n'.join( + result.split('\n')[1:]).replace('\n', '')) + return parameters['language'], parameters['code'] + except ValueError: + return language, code + else: + return language, code diff --git a/my_modelscope_agent/tools/code_interpreter_jupyter.py b/my_modelscope_agent/tools/code_interpreter_jupyter.py new file mode 100644 index 0000000000000000000000000000000000000000..dca78093c3245e91a6079b0079a03f87a3ccdbb5 --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_jupyter.py @@ -0,0 +1,319 @@ +import asyncio +import atexit +import base64 +import glob +import io +import os +import queue +import re +import shutil +import signal +import subprocess +import sys +import time +import traceback +import uuid +from pathlib import Path +from typing import Dict, Optional + +import json +import matplotlib +import PIL.Image +from jupyter_client import BlockingKernelClient + +from .tool import Tool + +WORK_DIR = os.getenv('CODE_INTERPRETER_WORK_DIR', '/tmp/ci_workspace') + +STATIC_URL = os.getenv('CODE_INTERPRETER_STATIC_URL', + 'http://127.0.0.1:7866/static') + +LAUNCH_KERNEL_PY = """ +from ipykernel import kernelapp as app +app.launch_new_instance() +""" + +INIT_CODE_FILE = str( + Path(__file__).absolute().parent / 'code_interpreter_utils' + / 'code_interpreter_init_kernel.py') + +ALIB_FONT_FILE = str( + Path(__file__).absolute().parent / 'code_interpreter_utils' + / 'AlibabaPuHuiTi-3-45-Light.ttf') + +_KERNEL_CLIENTS: Dict[int, BlockingKernelClient] = {} + + +class CodeInterpreterJupyter(Tool): + """ + using jupyter kernel client to interpret python code, + should not be used the other code interpreter tool at the same time + """ + description = '代码解释器,可用于执行Python代码。' + name = 'code_interpreter' + parameters: list = [{ + 'name': 'code', + 'description': '待执行的代码', + 'required': True + }] + + def __init__(self, cfg={}): + super().__init__(cfg) + self.timeout = self.cfg.get('timeout', 30) + self.image_server = self.cfg.get('image_server', False) + self.kernel_clients: Dict[int, BlockingKernelClient] = {} + atexit.register(self._kill_kernels) + + pid: int = os.getpid() + if pid in self.kernel_clients: + kc = self.kernel_clients[pid] + else: + self._fix_matplotlib_cjk_font_issue() + kc = self._start_kernel(pid) + with open(INIT_CODE_FILE) as fin: + start_code = fin.read() + start_code = start_code.replace('{{M6_FONT_PATH}}', + repr(ALIB_FONT_FILE)[1:-1]) + print(self._execute_code(kc, start_code)) + self.kernel_clients[pid] = kc + + self.kc = kc + + def __del__(self): + # make sure all the kernels are killed during __del__ + signal.signal(signal.SIGTERM, self._kill_kernels) + signal.signal(signal.SIGINT, self._kill_kernels) + + def _start_kernel(self, pid) -> BlockingKernelClient: + connection_file = os.path.join(WORK_DIR, + f'kernel_connection_file_{pid}.json') + launch_kernel_script = os.path.join(WORK_DIR, + f'launch_kernel_{pid}.py') + for f in [connection_file, launch_kernel_script]: + if os.path.exists(f): + print(f'WARNING: {f} already exists') + os.remove(f) + + os.makedirs(WORK_DIR, exist_ok=True) + + with open(launch_kernel_script, 'w') as fout: + fout.write(LAUNCH_KERNEL_PY) + + available_envs = ['PATH', 'PYTHONPATH', 'LD_LIBRARY_PATH'] + envs = {} + for k in available_envs: + if os.getenv(k) is not None: + envs[k] = os.getenv(k) + + args = ( + sys.executable, + launch_kernel_script, + '--IPKernelApp.connection_file', + connection_file, + '--matplotlib=inline', + '--quiet', + ) + kernel_process = subprocess.Popen([*args], env=envs, + cwd=WORK_DIR) # noqa E126 + print(f"INFO: kernel process's PID = {kernel_process.pid}") + + # Wait for kernel connection file to be written + while True: + if not os.path.isfile(connection_file): + time.sleep(0.1) + else: + # Keep looping if JSON parsing fails, file may be partially written + try: + with open(connection_file, 'r') as fp: + json.load(fp) + break + except json.JSONDecodeError: + pass + + # Client + kc = BlockingKernelClient(connection_file=connection_file) + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + kc.load_connection_file() + kc.start_channels() + kc.wait_for_ready() + return kc + + def _kill_kernels(self): + for v in self.kernel_clients.values(): + v.shutdown() + for k in list(self.kernel_clients.keys()): + del self.kernel_clients[k] + + def _serve_image(self, image_base64: str, image_type: str) -> str: + image_file = f'{uuid.uuid4()}.{image_type}' + local_image_file = os.path.join(WORK_DIR, image_file) + + png_bytes = base64.b64decode(image_base64) + assert isinstance(png_bytes, bytes) + + if image_type == 'gif': + with open(local_image_file, 'wb') as file: + file.write(png_bytes) + else: + bytes_io = io.BytesIO(png_bytes) + PIL.Image.open(bytes_io).save(local_image_file, image_type) + + if self.image_server: + image_url = f'{STATIC_URL}/{image_file}' + return image_url + else: + return local_image_file + + def _escape_ansi(self, line: str) -> str: + ansi_escape = re.compile(r'(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]') + return ansi_escape.sub('', line) + + def _fix_matplotlib_cjk_font_issue(self): + ttf_name = os.path.basename(ALIB_FONT_FILE) + local_ttf = os.path.join( + os.path.abspath( + os.path.join(matplotlib.matplotlib_fname(), os.path.pardir)), + 'fonts', 'ttf', ttf_name) + if not os.path.exists(local_ttf): + try: + shutil.copy(ALIB_FONT_FILE, local_ttf) + font_list_cache = os.path.join(matplotlib.get_cachedir(), + 'fontlist-*.json') + for cache_file in glob.glob(font_list_cache): + with open(cache_file) as fin: + cache_content = fin.read() + if ttf_name not in cache_content: + os.remove(cache_file) + except Exception: + traceback.format_exc() + + def _execute_code(self, kc: BlockingKernelClient, code: str) -> str: + kc.wait_for_ready() + kc.execute(code) + result = '' + image_idx = 0 + while True: + text = '' + image = '' + finished = False + msg_type = 'error' + try: + msg = kc.get_iopub_msg() + msg_type = msg['msg_type'] + if msg_type == 'status': + if msg['content'].get('execution_state') == 'idle': + finished = True + elif msg_type == 'execute_result': + text = msg['content']['data'].get('text/plain', '') + if 'image/png' in msg['content']['data']: + image_b64 = msg['content']['data']['image/png'] + image_url = self._serve_image(image_b64, 'png') + image_idx += 1 + image = '![IMAGEGEN](%s)' % (image_url) + elif 'text/html' in msg['content']['data']: + text += '\n' + msg['content']['data']['text/html'] + elif 'image/gif' in msg['content']['data']: + image_b64 = msg['content']['data']['image/gif'] + image_url = self._serve_image(image_b64, 'gif') + image_idx += 1 + image = '![IMAGEGEN](%s)' % (image_url) + elif msg_type == 'display_data': + if 'image/png' in msg['content']['data']: + image_b64 = msg['content']['data']['image/png'] + image_url = self._serve_image(image_b64, 'png') + image_idx += 1 + image = '![IMAGEGEN](%s)' % (image_url) + else: + text = msg['content']['data'].get('text/plain', '') + elif msg_type == 'stream': + msg_type = msg['content']['name'] # stdout, stderr + text = msg['content']['text'] + elif msg_type == 'error': + text = self._escape_ansi('\n'.join( + msg['content']['traceback'])) + if 'M6_CODE_INTERPRETER_TIMEOUT' in text: + text = 'Timeout: Code execution exceeded the time limit.' + except queue.Empty: + text = 'Timeout: Code execution exceeded the time limit.' + finished = True + except Exception: + text = 'The code interpreter encountered an unexpected error.' + traceback.format_exc() + finished = True + if text: + result += f'\n{text}' + if image: + result += f'\n\n{image}' + if finished: + break + result = result.lstrip('\n') + if not result: + result += 'The code executed successfully.' + return result + + def _local_call(self, *args, **kwargs): + code = self._handle_input_fallback(**kwargs) + if not code.strip(): + return '' + + if self.timeout: + code = f'_M6CountdownTimer.start({self.timeout})\n{code}' + + fixed_code = [] + for line in code.split('\n'): + fixed_code.append(line) + if line.startswith('sns.set_theme('): + fixed_code.append( + 'plt.rcParams["font.family"] = _m6_font_prop.get_name()') + fixed_code = '\n'.join(fixed_code) + result = self._execute_code(self.kc, fixed_code) + + if self.timeout: + self._execute_code(self.kc, '_M6CountdownTimer.cancel()') + + return {'result': result} + + def _handle_input_fallback(self, **kwargs): + """ + an alternative method is to parse code in content not from function call + such as: + text = response['content'] + code_block = re.search(r'```([\s\S]+)```', text) # noqa W^05 + if code_block: + result = code_block.group(1) + language = result.split('\n')[0] + code = '\n'.join(result.split('\n')[1:]) + + :param fallback_text: + :return: language, cocde + """ + + code = kwargs.get('code', None) + fallback = kwargs.get('fallback', None) + + if code: + return code + elif fallback: + try: + text = fallback + code_block = re.search(r'```([\s\S]+)```', text) # noqa W^05 + if code_block: + result = code_block.group(1) + language = result.split('\n')[0] + if language == 'py' or language == 'python': + # handle py case + # ```py code ``` + language = 'python' + code = '\n'.join(result.split('\n')[1:]) + return code + + if language == 'json': + # handle json case + # ```json {language,code}``` + parameters = json.loads('\n'.join( + result.split('\n')[1:]).replace('\n', '')) + return parameters['code'] + except ValueError: + return code + else: + return code diff --git a/my_modelscope_agent/tools/code_interpreter_utils/__init__.py b/my_modelscope_agent/tools/code_interpreter_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..274f24c3713992802271d3777697b160e150a3cc --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/__init__.py @@ -0,0 +1,5 @@ +# all the utility functions under code_interpreter_utils are borrowed from project +# in order to use python lower than 3.10 +# https://github.com/KillianLucas/open-interpreter + +from .base_code_interpreter import BaseCodeInterpreter diff --git a/my_modelscope_agent/tools/code_interpreter_utils/base_code_interpreter.py b/my_modelscope_agent/tools/code_interpreter_utils/base_code_interpreter.py new file mode 100644 index 0000000000000000000000000000000000000000..23796e424034ed98f5bc9ad37db6acd2e742d8b9 --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/base_code_interpreter.py @@ -0,0 +1,13 @@ +class BaseCodeInterpreter: + """ + .run is a generator that yields a dict with attributes: active_line, output + """ + + def __init__(self): + pass + + def run(self, code): + pass + + def terminate(self): + pass diff --git a/my_modelscope_agent/tools/code_interpreter_utils/code_interpreter_init_kernel.py b/my_modelscope_agent/tools/code_interpreter_utils/code_interpreter_init_kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..62511247f09cd9d15d9f6ea7491f29ae1bbdaf3c --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/code_interpreter_init_kernel.py @@ -0,0 +1,50 @@ +import math # noqa +import os # noqa +import re # noqa +import signal + +import json # noqa +import matplotlib # noqa +import matplotlib.pyplot as plt +import numpy as np # noqa +import pandas as pd # noqa +import seaborn as sns +from matplotlib.font_manager import FontProperties +from sympy import Eq, solve, symbols # noqa + + +def input(*args, **kwargs): # noqa + raise NotImplementedError('Python input() function is disabled.') + + +def _m6_timout_handler(_signum=None, _frame=None): + raise TimeoutError('M6_CODE_INTERPRETER_TIMEOUT') + + +try: + signal.signal(signal.SIGALRM, _m6_timout_handler) +except AttributeError: # windows + pass + + +class _M6CountdownTimer: + + @classmethod + def start(cls, timeout: int): + try: + signal.alarm(timeout) + except AttributeError: # windows + pass # TODO: I haven't found a solution that works with jupyter yet. + + @classmethod + def cancel(cls): + try: + signal.alarm(0) + except AttributeError: # windows + pass # TODO + + +sns.set_theme() + +_m6_font_prop = FontProperties(fname='{{M6_FONT_PATH}}') +plt.rcParams['font.family'] = _m6_font_prop.get_name() diff --git a/my_modelscope_agent/tools/code_interpreter_utils/create_code_interpreter.py b/my_modelscope_agent/tools/code_interpreter_utils/create_code_interpreter.py new file mode 100644 index 0000000000000000000000000000000000000000..e185b2fe6bdc676d7d89648f0e28c01b4c7915eb --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/create_code_interpreter.py @@ -0,0 +1,12 @@ +from .language_map import language_map + + +def create_code_interpreter(language): + # Case in-sensitive + language = language.lower() + + try: + CodeInterpreter = language_map[language] + return CodeInterpreter() + except KeyError: + raise ValueError(f'Unknown or unsupported language: {language}') diff --git a/my_modelscope_agent/tools/code_interpreter_utils/language_map.py b/my_modelscope_agent/tools/code_interpreter_utils/language_map.py new file mode 100644 index 0000000000000000000000000000000000000000..e28ad50b3a01fbd94eb615ea612dc01de299dd7a --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/language_map.py @@ -0,0 +1,19 @@ +from .languages.applescript import AppleScript +from .languages.html import HTML +from .languages.javascript import JavaScript +from .languages.powershell import PowerShell +from .languages.python import Python +from .languages.r import R +from .languages.shell import Shell + +language_map = { + 'python': Python, + 'bash': Shell, + 'shell': Shell, + 'zsh': Shell, + 'javascript': JavaScript, + 'html': HTML, + 'applescript': AppleScript, + 'r': R, + 'powershell': PowerShell, +} diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/__init__.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/applescript.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/applescript.py new file mode 100644 index 0000000000000000000000000000000000000000..4100ce3cef60d782871e2a60a3c72d73caef784b --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/applescript.py @@ -0,0 +1,67 @@ +import os + +from ..subprocess_code_interpreter import SubprocessCodeInterpreter + + +class AppleScript(SubprocessCodeInterpreter): + file_extension = 'applescript' + proper_name = 'AppleScript' + + def __init__(self): + super().__init__() + self.start_cmd = os.environ.get('SHELL', '/bin/zsh') + + def preprocess_code(self, code): + """ + Inserts an end_of_execution marker and adds active line indicators. + """ + # Add active line indicators to the code + code = self.add_active_line_indicators(code) + + # Escape double quotes + code = code.replace('"', r"\"") + + # Wrap in double quotes + code = '"' + code + '"' + + # Prepend start command for AppleScript + code = 'osascript -e ' + code + + # Append end of execution indicator + code += '; echo "##end_of_execution##"' + + return code + + def add_active_line_indicators(self, code): + """ + Adds log commands to indicate the active line of execution in the AppleScript. + """ + modified_lines = [] + lines = code.split('\n') + + for idx, line in enumerate(lines): + # Add log command to indicate the line number + if line.strip(): # Only add if line is not empty + modified_lines.append(f'log "##active_line{idx + 1}##"') + modified_lines.append(line) + + return '\n'.join(modified_lines) + + def detect_active_line(self, line): + """ + Detects active line indicator in the output. + """ + prefix = '##active_line' + if prefix in line: + try: + return int(line.split(prefix)[1].split()[0]) + except Exception as e: + print(e) + pass + return None + + def detect_end_of_execution(self, line): + """ + Detects end of execution marker in the output. + """ + return '##end_of_execution##' in line diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/html.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/html.py new file mode 100644 index 0000000000000000000000000000000000000000..f1745944e328420e72c457d16e568579ada3f8a6 --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/html.py @@ -0,0 +1,26 @@ +import os +import tempfile +import webbrowser + +from ..base_code_interpreter import BaseCodeInterpreter + + +class HTML(BaseCodeInterpreter): + file_extension = 'html' + proper_name = 'HTML' + + def __init__(self): + super().__init__() + + def run(self, code): + # Create a temporary HTML file with the content + with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as f: + f.write(code.encode()) + + # Open the HTML file with the default web browser + webbrowser.open('file://' + os.path.realpath(f.name)) + + yield { + 'output': + f"Saved to {os.path.realpath(f.name)} and opened with the user's default web browser." + } diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/javascript.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/javascript.py new file mode 100644 index 0000000000000000000000000000000000000000..cb35f4f8488e8fcf8d511ab4cd5277e9bb9fa38d --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/javascript.py @@ -0,0 +1,66 @@ +import re + +from ..subprocess_code_interpreter import SubprocessCodeInterpreter + + +class JavaScript(SubprocessCodeInterpreter): + file_extension = 'js' + proper_name = 'JavaScript' + + def __init__(self): + super().__init__() + self.start_cmd = 'node -i' + + def preprocess_code(self, code): + return preprocess_javascript(code) + + def line_postprocessor(self, line): + # Node's interactive REPL outputs a billion things + # So we clean it up: + if 'Welcome to Node.js' in line: + return None + if line.strip() in ['undefined', 'Type ".help" for more information.']: + return None + # Remove trailing ">"s + line = re.sub(r'^\s*(>\s*)+', '', line) + return line + + def detect_active_line(self, line): + if '##active_line' in line: + return int(line.split('##active_line')[1].split('##')[0]) + return None + + def detect_end_of_execution(self, line): + return '##end_of_execution##' in line + + +def preprocess_javascript(code): + """ + Add active line markers + Wrap in a try catch + Add end of execution marker + """ + + # Split code into lines + lines = code.split('\n') + processed_lines = [] + + for i, line in enumerate(lines, 1): + # Add active line print + processed_lines.append(f'console.log("##active_line{i}##");') + processed_lines.append(line) + + # Join lines to form the processed code + processed_code = '\n'.join(processed_lines) + + # Wrap in a try-catch and add end of execution marker + processed_code = f""" +try {{ +{processed_code} +}} catch (e) {{ + console.log(e); +}} +console.log("##end_of_execution##"); +""" + + return processed_code diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/powershell.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/powershell.py new file mode 100644 index 0000000000000000000000000000000000000000..467aa1105252a3f22374f9e8e060c1feeb5b17ee --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/powershell.py @@ -0,0 +1,75 @@ +import os +import platform +import shutil + +from ..subprocess_code_interpreter import SubprocessCodeInterpreter + + +class PowerShell(SubprocessCodeInterpreter): + file_extension = 'ps1' + proper_name = 'PowerShell' + + def __init__(self): + super().__init__() + + # Determine the start command based on the platform (use "powershell" for Windows) + if platform.system() == 'Windows': + self.start_cmd = 'powershell.exe' + # self.start_cmd = os.environ.get('SHELL', 'powershell.exe') + else: + # On non-Windows platforms, prefer pwsh (PowerShell Core) if available, or fall back to bash + self.start_cmd = 'pwsh' if shutil.which('pwsh') else 'bash' + + def preprocess_code(self, code): + return preprocess_powershell(code) + + def line_postprocessor(self, line): + return line + + def detect_active_line(self, line): + if '##active_line' in line: + return int(line.split('##active_line')[1].split('##')[0]) + return None + + def detect_end_of_execution(self, line): + return '##end_of_execution##' in line + + +def preprocess_powershell(code): + """ + Add active line markers + Wrap in try-catch block + Add end of execution marker + """ + # Add commands that tell us what the active line is + code = add_active_line_prints(code) + + # Wrap in try-catch block for error handling + code = wrap_in_try_catch(code) + + # Add end marker (we'll be listening for this to know when it ends) + code += '\nWrite-Output "##end_of_execution##"' + + return code + + +def add_active_line_prints(code): + """ + Add Write-Output statements indicating line numbers to a PowerShell script. + """ + lines = code.split('\n') + for index, line in enumerate(lines): + # Insert the Write-Output command before the actual line + lines[index] = f'Write-Output "##active_line{index + 1}##"\n{line}' + return '\n'.join(lines) + + +def wrap_in_try_catch(code): + """ + Wrap PowerShell code in a try-catch block to catch errors and display them. + """ + try_catch_code = """ +try { + $ErrorActionPreference = "Stop" +""" + return try_catch_code + code + '\n} catch {\n Write-Error $_\n}\n' diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/python.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/python.py new file mode 100644 index 0000000000000000000000000000000000000000..107cc2009a3c445a10ee7115bcf624c10b20e7e2 --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/python.py @@ -0,0 +1,161 @@ +import ast +import os +import re +import shlex +import sys + +from ..subprocess_code_interpreter import SubprocessCodeInterpreter + + +class Python(SubprocessCodeInterpreter): + file_extension = 'py' + proper_name = 'Python' + + def __init__(self): + super().__init__() + executable = sys.executable + if os.name != 'nt': # not Windows + executable = shlex.quote(executable) + self.start_cmd = executable + ' -i -q -u' + + def preprocess_code(self, code): + return preprocess_python(code) + + def line_postprocessor(self, line): + if re.match(r'^(\s*>>>\s*|\s*\.\.\.\s*)', line): + return None + return line + + def detect_active_line(self, line): + if '##active_line' in line: + return int(line.split('##active_line')[1].split('##')[0]) + return None + + def detect_end_of_execution(self, line): + return '##end_of_execution##' in line + + +def preprocess_python(code): + """ + Add active line markers + Wrap in a try except + Add end of execution marker + """ + + # Add print commands that tell us what the active line is + code = add_active_line_prints(code) + + # Wrap in a try except + code = wrap_in_try_except(code) + + # Remove any whitespace lines, as this will break indented blocks + # (are we sure about this? test this) + code_lines = code.split('\n') + code_lines = [c for c in code_lines if c.strip() != ''] + code = '\n'.join(code_lines) + + # Add end command (we'll be listening for this so we know when it ends) + code += '\n\nprint("##end_of_execution##")' + + return code + + +def add_active_line_prints(code): + """ + Add print statements indicating line numbers to a python string. + """ + tree = ast.parse(code) + transformer = AddLinePrints() + new_tree = transformer.visit(tree) + return ast.unparse(new_tree) + + +class AddLinePrints(ast.NodeTransformer): + """ + Transformer to insert print statements indicating the line number + before every executable line in the AST. + """ + + def insert_print_statement(self, line_number): + """Inserts a print statement for a given line number.""" + return ast.Expr( + value=ast.Call( + func=ast.Name(id='print', ctx=ast.Load()), + args=[ast.Constant(value=f'##active_line{line_number}##')], + keywords=[], + )) + + def process_body(self, body): + """Processes a block of statements, adding print calls.""" + new_body = [] + + # In case it's not iterable: + if not isinstance(body, list): + body = [body] + + for sub_node in body: + if hasattr(sub_node, 'lineno'): + new_body.append(self.insert_print_statement(sub_node.lineno)) + new_body.append(sub_node) + + return new_body + + def visit(self, node): + """Overridden visit to transform nodes.""" + new_node = super().visit(node) + + # If node has a body, process it + if hasattr(new_node, 'body'): + new_node.body = self.process_body(new_node.body) + + # If node has an orelse block (like in for, while, if), process it + if hasattr(new_node, 'orelse') and new_node.orelse: + new_node.orelse = self.process_body(new_node.orelse) + + # Special case for Try nodes as they have multiple blocks + if isinstance(new_node, ast.Try): + for handler in new_node.handlers: + handler.body = self.process_body(handler.body) + if new_node.finalbody: + new_node.finalbody = self.process_body(new_node.finalbody) + + return new_node + + +def wrap_in_try_except(code): + # Add import traceback + code = 'import traceback\n' + code + + # Parse the input code into an AST + parsed_code = ast.parse(code) + + # Wrap the entire code's AST in a single try-except block + try_except = ast.Try( + body=parsed_code.body, + handlers=[ + ast.ExceptHandler( + type=ast.Name(id='Exception', ctx=ast.Load()), + name=None, + body=[ + ast.Expr( + value=ast.Call( + func=ast.Attribute( + value=ast.Name(id='traceback', ctx=ast.Load()), + attr='print_exc', + ctx=ast.Load(), + ), + args=[], + keywords=[], + )), + ], + ) + ], + orelse=[], + finalbody=[], + ) + + # Assign the try-except block as the new body + parsed_code.body = [try_except] + + # Convert the modified AST back to source code + return ast.unparse(parsed_code) diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/r.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/r.py new file mode 100644 index 0000000000000000000000000000000000000000..28936608c7fce3b1ff7952bd09abd0b62f95539e --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/r.py @@ -0,0 +1,71 @@ +import re + +from ..subprocess_code_interpreter import SubprocessCodeInterpreter + + +class R(SubprocessCodeInterpreter): + file_extension = 'r' + proper_name = 'R' + + def __init__(self): + super().__init__() + self.start_cmd = 'R -q --vanilla' # Start R in quiet and vanilla mode + + def preprocess_code(self, code): + """ + Add active line markers + Wrap in a tryCatch for better error handling in R + Add end of execution marker + """ + + lines = code.split('\n') + processed_lines = [] + + for i, line in enumerate(lines, 1): + # Add active line print + processed_lines.append(f'cat("##active_line{i}##\\n");{line}') + + # Join lines to form the processed code + processed_code = '\n'.join(processed_lines) + + # Wrap in a tryCatch for error handling and add end of execution marker + processed_code = f""" +tryCatch({{ +{processed_code} +}}, error=function(e){{ + cat("## execution_error ##\\n", conditionMessage(e), "\\n"); +}}) +cat("## end_of_execution ##\\n"); +""" + # Count the number of lines of processed_code + # (R echoes all code back for some reason, but we can skip it if we track this!) + self.code_line_count = len(processed_code.split('\n')) - 1 + + return processed_code + + def line_postprocessor(self, line): + # If the line count attribute is set and non-zero, decrement and skip the line + if hasattr(self, 'code_line_count') and self.code_line_count > 0: + self.code_line_count -= 1 + return None + + if re.match(r'^(\s*>>>\s*|\s*\.\.\.\s*|\s*>\s*|\s*\+\s*|\s*)$', line): + return None + if 'R version' in line: # Startup message + return None + if line.strip().startswith('[1] "') and line.endswith( + '"'): # For strings, trim quotation marks + return line[5:-1].strip() + if line.strip().startswith( + '[1]'): # Normal R output prefix for non-string outputs + return line[4:].strip() + + return line + + def detect_active_line(self, line): + if '##active_line' in line: + return int(line.split('##active_line')[1].split('##')[0]) + return None + + def detect_end_of_execution(self, line): + return '##end_of_execution##' in line or '## execution_error ##' in line diff --git a/my_modelscope_agent/tools/code_interpreter_utils/languages/shell.py b/my_modelscope_agent/tools/code_interpreter_utils/languages/shell.py new file mode 100644 index 0000000000000000000000000000000000000000..bbc067071f317c4a22176f140c910f139f09e4dc --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/languages/shell.py @@ -0,0 +1,89 @@ +import os +import platform +import re + +from ..subprocess_code_interpreter import SubprocessCodeInterpreter + + +class Shell(SubprocessCodeInterpreter): + file_extension = 'sh' + proper_name = 'Shell' + + def __init__(self): + super().__init__() + + # Determine the start command based on the platform + if platform.system() == 'Windows': + self.start_cmd = 'cmd.exe' + else: + self.start_cmd = os.environ.get('SHELL', 'bash') + + def preprocess_code(self, code): + return preprocess_shell(code) + + def line_postprocessor(self, line): + return line + + def detect_active_line(self, line): + if '##active_line' in line: + return int(line.split('##active_line')[1].split('##')[0]) + return None + + def detect_end_of_execution(self, line): + return '##end_of_execution##' in line + + +def preprocess_shell(code): + """ + Add active line markers + Wrap in a try except (trap in shell) + Add end of execution marker + """ + + # Add commands that tell us what the active line is + # if it's multiline, just skip this. soon we should make it work with multiline + if not has_multiline_commands(code): + code = add_active_line_prints(code) + + # Add end command (we'll be listening for this so we know when it ends) + code += '\necho "##end_of_execution##"' + + return code + + +def add_active_line_prints(code): + """ + Add echo statements indicating line numbers to a shell string. + """ + lines = code.split('\n') + for index, line in enumerate(lines): + # Insert the echo command before the actual line + lines[index] = f'echo "##active_line{index + 1}##"\n{line}' + return '\n'.join(lines) + + +def has_multiline_commands(script_text): + # Patterns that indicate a line continues + continuation_patterns = [ + r'\\$', # Line continuation character at the end of the line + r'\|$', # Pipe character at the end of the line indicating a pipeline continuation + r'&&\s*$', # Logical AND at the end of the line + r'\|\|\s*$', # Logical OR at the end of the line + r'<\($', # Start of process substitution + r'\($', # Start of subshell + r'{\s*$', # Start of a block + r'\bif\b', # Start of an if statement + r'\bwhile\b', # Start of a while loop + r'\bfor\b', # Start of a for loop + r'do\s*$', # 'do' keyword for loops + r'then\s*$', # 'then' keyword for if statements + ] + + # Check each line for multiline patterns + for line in script_text.splitlines(): + if any( + re.search(pattern, line.rstrip()) + for pattern in continuation_patterns): + return True + + return False diff --git a/my_modelscope_agent/tools/code_interpreter_utils/subprocess_code_interpreter.py b/my_modelscope_agent/tools/code_interpreter_utils/subprocess_code_interpreter.py new file mode 100644 index 0000000000000000000000000000000000000000..01e6a7e0dddf8cce88e4aa15dd74a29849940e08 --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/subprocess_code_interpreter.py @@ -0,0 +1,152 @@ +import queue +import subprocess +import threading +import time +import traceback + +from .base_code_interpreter import BaseCodeInterpreter + + +class SubprocessCodeInterpreter(BaseCodeInterpreter): + + def __init__(self): + self.start_cmd = '' + self.process = None + self.debug_mode = False + self.output_queue = queue.Queue() + self.done = threading.Event() + + def detect_active_line(self, line): + return None + + def detect_end_of_execution(self, line): + return None + + def line_postprocessor(self, line): + return line + + def preprocess_code(self, code): + """ + This needs to insert an end_of_execution marker of some kind, + which can be detected by detect_end_of_execution. + + Optionally, add active line markers for detect_active_line. + """ + return code + + def terminate(self): + self.process.terminate() + + def start_process(self): + if self.process: + self.terminate() + + self.process = subprocess.Popen( + self.start_cmd.split(), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=0, + universal_newlines=True, + ) + threading.Thread( + target=self.handle_stream_output, + args=(self.process.stdout, False), + daemon=True, + ).start() + threading.Thread( + target=self.handle_stream_output, + args=(self.process.stderr, True), + daemon=True, + ).start() + + def run(self, code): + retry_count = 0 + max_retries = 3 + + # Setup + try: + code = self.preprocess_code(code) + if not self.process: + self.start_process() + except Exception as e: + print(e) + yield {'output': traceback.format_exc()} + return + + while retry_count <= max_retries: + if self.debug_mode: + print( + f'(after processing) Running processed code:\n{code}\n---') + + self.done.clear() + + try: + self.process.stdin.write(code + '\n') + self.process.stdin.flush() + break + except Exception as e: + print(e) + if retry_count != 0: + # For UX, I like to hide this if it happens once. Obviously feels better to not see errors + # Most of the time it doesn't matter, but we should figure out why it happens frequently with: + # applescript + yield {'output': traceback.format_exc()} + yield { + 'output': f'Retrying... ({retry_count}/{max_retries})' + } + yield {'output': 'Restarting process.'} + + self.start_process() + + retry_count += 1 + if retry_count > max_retries: + yield { + 'output': + 'Maximum retries reached. Could not execute code.' + } + return + + while True: + if not self.output_queue.empty(): + yield self.output_queue.get() + else: + time.sleep(0.1) + try: + output = self.output_queue.get( + timeout=0.3) # Waits for 0.3 seconds + yield output + except queue.Empty: + if self.done.is_set(): + # Try to yank 3 more times from it... maybe there's something in there... + # (I don't know if this actually helps. Maybe we just need to yank 1 more time) + for _ in range(3): + if not self.output_queue.empty(): + yield self.output_queue.get() + time.sleep(0.2) + break + + def handle_stream_output(self, stream, is_error_stream): + for line in iter(stream.readline, ''): + if self.debug_mode: + print(f'Received output line:\n{line}\n---') + + line = self.line_postprocessor(line) + + if line is None: + continue # `line = None` is the postprocessor's signal to discard completely + + if self.detect_active_line(line): + active_line = self.detect_active_line(line) + self.output_queue.put({'active_line': active_line}) + elif self.detect_end_of_execution(line): + self.output_queue.put({'active_line': None}) + time.sleep(0.1) + self.done.set() + elif is_error_stream and 'KeyboardInterrupt' in line: + self.output_queue.put({'output': 'KeyboardInterrupt'}) + time.sleep(0.1) + self.done.set() + else: + self.output_queue.put({'output': line}) diff --git a/my_modelscope_agent/tools/code_interpreter_utils/truncate_output.py b/my_modelscope_agent/tools/code_interpreter_utils/truncate_output.py new file mode 100644 index 0000000000000000000000000000000000000000..f3ed3314ff51d0a7af6c19abb3e4bfabc2ede420 --- /dev/null +++ b/my_modelscope_agent/tools/code_interpreter_utils/truncate_output.py @@ -0,0 +1,15 @@ +def truncate_output(data, max_output_chars=2000): + needs_truncation = False + + message = f'Output truncated. Showing the last {max_output_chars} characters.\n\n' + + # Remove previous truncation message if it exists + if data.startswith(message): + data = data[len(message):] + needs_truncation = True + + # If data exceeds max length, truncate it and add message + if len(data) > max_output_chars or needs_truncation: + data = message + data[-max_output_chars:] + + return data diff --git a/my_modelscope_agent/tools/hf_tool.py b/my_modelscope_agent/tools/hf_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..98fa94e6fd76d96139e1292fe35b136acaa4f9ab --- /dev/null +++ b/my_modelscope_agent/tools/hf_tool.py @@ -0,0 +1,22 @@ +from typing import Dict, List + +from transformers.tools import Tool as HFTool + +from .tool import Tool + + +class HFTool(Tool): + """Simple wrapper for huggingface transformers tools + + """ + + def __init__(self, tool: HFTool, description: str, name: str, + parameters: List[Dict]): + self.tool = tool + self.description = description + self.name = name + self.parameters = parameters + super().__init__() + + def _local_call(self, *args, **kwargs): + return {'result': self.tool(**kwargs)} diff --git a/my_modelscope_agent/tools/image_chat_tool.py b/my_modelscope_agent/tools/image_chat_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..526df966b8ac47bb3e26224c8be8d941101f3f9f --- /dev/null +++ b/my_modelscope_agent/tools/image_chat_tool.py @@ -0,0 +1,51 @@ +from modelscope.utils.constant import Tasks +from .pipeline_tool import ModelscopePipelineTool + + +class ImageChatTool(ModelscopePipelineTool): + default_model = 'damo/multi-modal_mplug_owl_multimodal-dialogue_7b' + description = '图文对话和图像描述服务,针对输入的图片和用户的文本输入,给出文本回复' + name = 'modelscope_image-chat' + parameters: list = [{ + 'name': 'image', + 'description': '用户输入的图片', + 'required': True + }, { + 'name': 'text', + 'description': '用户输入的文本', + 'required': True + }] + task = Tasks.multimodal_dialogue + + def construct_image_chat_input(self, **kwargs): + image = kwargs.pop('image', '') + text = kwargs.pop('text', '') + + system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.' + system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions." + messages = { + 'messages': [ + { + 'role': 'system', + 'content': system_prompt_1 + ' ' + system_prompt_2 + }, + { + 'role': 'user', + 'content': [{ + 'image': image + }] + }, + { + 'role': 'user', + 'content': text + }, + ] + } + return messages + + def _remote_parse_input(self, *args, **kwargs): + messages = self.construct_image_chat_input(**kwargs) + return {'input': messages} + + def _local_parse_input(self, *args, **kwargs): + return (self.construct_image_chat_input(**kwargs)), {} diff --git a/my_modelscope_agent/tools/openapi_plugin.py b/my_modelscope_agent/tools/openapi_plugin.py new file mode 100644 index 0000000000000000000000000000000000000000..2502305ab532ae20157e9b98f830ab03fd46d925 --- /dev/null +++ b/my_modelscope_agent/tools/openapi_plugin.py @@ -0,0 +1,370 @@ +import os +import re +from typing import List, Optional + +import json +import requests +from jsonschema import RefResolver +from pydantic import BaseModel, ValidationError +from requests.exceptions import RequestException, Timeout + +from .tool import Tool + +MAX_RETRY_TIMES = 3 + + +class ParametersSchema(BaseModel): + name: str + description: str + required: Optional[bool] = True + + +class ToolSchema(BaseModel): + name: str + description: str + parameters: List[ParametersSchema] + + +class OpenAPIPluginTool(Tool): + """ + openapi schema tool + """ + name: str = 'api tool' + description: str = 'This is a api tool that ...' + parameters: list = [] + + def __init__(self, cfg, name): + self.name = name + self.cfg = cfg.get(self.name, {}) + self.is_remote_tool = self.cfg.get('is_remote_tool', False) + # remote call + self.url = self.cfg.get('url', '') + self.token = self.cfg.get('token', '') + self.header = self.cfg.get('header', '') + self.method = self.cfg.get('method', '') + self.parameters = self.cfg.get('parameters', []) + self.description = self.cfg.get('description', + 'This is a api tool that ...') + self.responses_param = self.cfg.get('responses_param', []) + try: + all_para = { + 'name': self.name, + 'description': self.description, + 'parameters': self.parameters + } + self.tool_schema = ToolSchema(**all_para) + except ValidationError: + raise ValueError(f'Error when parsing parameters of {self.name}') + self._str = self.tool_schema.model_dump_json() + self._function = self.parse_pydantic_model_to_openai_function(all_para) + + def _remote_call(self, *args, **kwargs): + if self.url == '': + raise ValueError( + f"Could not use remote call for {self.name} since this tool doesn't have a remote endpoint" + ) + + remote_parsed_input = json.dumps( + self._remote_parse_input(*args, **kwargs)) + origin_result = None + if self.method == 'POST': + retry_times = MAX_RETRY_TIMES + while retry_times: + retry_times -= 1 + try: + print(f'data: {kwargs}') + print(f'header: {self.header}') + response = requests.request( + 'POST', + url=self.url, + headers=self.header, + data=remote_parsed_input) + + if response.status_code != requests.codes.ok: + response.raise_for_status() + origin_result = json.loads( + response.content.decode('utf-8')) + + final_result = self._parse_output( + origin_result, remote=True) + return final_result + except Timeout: + continue + except RequestException as e: + raise ValueError( + f'Remote call failed with error code: {e.response.status_code},\ + error message: {e.response.content.decode("utf-8")}') + + raise ValueError( + 'Remote call max retry times exceeded! Please try to use local call.' + ) + elif self.method == 'GET': + retry_times = MAX_RETRY_TIMES + + new_url = self.url + matches = re.findall(r'\{(.*?)\}', self.url) + for match in matches: + if match in kwargs: + new_url = new_url.replace('{' + match + '}', kwargs[match]) + else: + print( + f'The parameter {match} was not generated by the model.' + ) + + while retry_times: + retry_times -= 1 + try: + print('GET:', new_url) + print('GET:', self.url) + + response = requests.request( + 'GET', + url=new_url, + headers=self.header, + params=remote_parsed_input) + if response.status_code != requests.codes.ok: + response.raise_for_status() + + origin_result = json.loads( + response.content.decode('utf-8')) + + final_result = self._parse_output( + origin_result, remote=True) + return final_result + except Timeout: + continue + except RequestException as e: + raise ValueError( + f'Remote call failed with error code: {e.response.status_code},\ + error message: {e.response.content.decode("utf-8")}') + + raise ValueError( + 'Remote call max retry times exceeded! Please try to use local call.' + ) + else: + raise ValueError( + 'Remote call method is invalid!We have POST and GET method.') + + def _remote_parse_input(self, *args, **kwargs): + restored_dict = {} + for key, value in kwargs.items(): + if '.' in key: + # Split keys by "." and create nested dictionary structures + keys = key.split('.') + temp_dict = restored_dict + for k in keys[:-1]: + temp_dict = temp_dict.setdefault(k, {}) + temp_dict[keys[-1]] = value + else: + # f the key does not contain ".", directly store the key-value pair into restored_dict + restored_dict[key] = value + kwargs = restored_dict + print('传给tool的参数:', kwargs) + return kwargs + + +# openapi_schema_convert,register to tool_config.json +def extract_references(schema_content): + references = [] + if isinstance(schema_content, dict): + if '$ref' in schema_content: + references.append(schema_content['$ref']) + for key, value in schema_content.items(): + references.extend(extract_references(value)) + elif isinstance(schema_content, list): + for item in schema_content: + references.extend(extract_references(item)) + return references + + +def parse_nested_parameters(param_name, param_info, parameters_list, content): + param_type = param_info['type'] + param_description = param_info.get('description', + f'用户输入的{param_name}') # 按需更改描述 + param_required = param_name in content['required'] + try: + if param_type == 'object': + properties = param_info.get('properties') + if properties: + # If the argument type is an object and has a non-empty "properties" field, + # its internal properties are parsed recursively + for inner_param_name, inner_param_info in properties.items(): + inner_param_type = inner_param_info['type'] + inner_param_description = inner_param_info.get( + 'description', f'用户输入的{param_name}.{inner_param_name}') + inner_param_required = param_name.split( + '.')[0] in content['required'] + + # Recursively call the function to handle nested objects + if inner_param_type == 'object': + parse_nested_parameters( + f'{param_name}.{inner_param_name}', + inner_param_info, parameters_list, content) + else: + parameters_list.append({ + 'name': + f'{param_name}.{inner_param_name}', + 'description': + inner_param_description, + 'required': + inner_param_required, + 'type': + inner_param_type, + 'value': + inner_param_info.get('enum', '') + }) + else: + # Non-nested parameters are added directly to the parameter list + parameters_list.append({ + 'name': param_name, + 'description': param_description, + 'required': param_required, + 'type': param_type, + 'value': param_info.get('enum', '') + }) + except Exception as e: + raise ValueError(f'{e}:schema结构出错') + + +def parse_responses_parameters(param_name, param_info, parameters_list): + param_type = param_info['type'] + param_description = param_info.get('description', + f'调用api返回的{param_name}') # 按需更改描述 + try: + if param_type == 'object': + properties = param_info.get('properties') + if properties: + # If the argument type is an object and has a non-empty "properties" + # field, its internal properties are parsed recursively + + for inner_param_name, inner_param_info in properties.items(): + param_type = inner_param_info['type'] + param_description = inner_param_info.get( + 'description', + f'调用api返回的{param_name}.{inner_param_name}') + parameters_list.append({ + 'name': f'{param_name}.{inner_param_name}', + 'description': param_description, + 'type': param_type, + }) + else: + # Non-nested parameters are added directly to the parameter list + parameters_list.append({ + 'name': param_name, + 'description': param_description, + 'type': param_type, + }) + except Exception as e: + raise ValueError(f'{e}:schema结构出错') + + +def openapi_schema_convert(schema, auth): + + resolver = RefResolver.from_schema(schema) + servers = schema.get('servers', []) + if servers: + servers_url = servers[0].get('url') + else: + print('No URL found in the schema.') + # Extract endpoints + endpoints = schema.get('paths', {}) + description = schema.get('info', {}).get('description', + 'This is a api tool that ...') + config_data = {} + # Iterate over each endpoint and its contents + for endpoint_path, methods in endpoints.items(): + for method, details in methods.items(): + summary = details.get('summary', 'No summary').replace(' ', '_') + name = details.get('operationId', 'No operationId') + url = f'{servers_url}{endpoint_path}' + security = details.get('security', [{}]) + # Security (Bearer Token) + authorization = '' + if security: + for sec in security: + if 'BearerAuth' in sec: + api_token = auth.get('apikey', os.environ['apikey']) + api_token_type = auth.get('apikey_type', + os.environ['apikey_type']) + authorization = f'{api_token_type} {api_token}' + if method.upper() == 'POST': + requestBody = details.get('requestBody', {}) + if requestBody: + for content_type, content_details in requestBody.get( + 'content', {}).items(): + schema_content = content_details.get('schema', {}) + references = extract_references(schema_content) + for reference in references: + resolved_schema = resolver.resolve(reference) + content = resolved_schema[1] + parameters_list = [] + for param_name, param_info in content[ + 'properties'].items(): + parse_nested_parameters( + param_name, param_info, parameters_list, + content) + X_DashScope_Async = requestBody.get( + 'X-DashScope-Async', '') + if X_DashScope_Async == '': + config_entry = { + 'name': name, + 'description': description, + 'is_active': True, + 'is_remote_tool': True, + 'url': url, + 'method': method.upper(), + 'parameters': parameters_list, + 'header': { + 'Content-Type': content_type, + 'Authorization': authorization + } + } + else: + config_entry = { + 'name': name, + 'description': description, + 'is_active': True, + 'is_remote_tool': True, + 'url': url, + 'method': method.upper(), + 'parameters': parameters_list, + 'header': { + 'Content-Type': content_type, + 'Authorization': authorization, + 'X-DashScope-Async': 'enable' + } + } + else: + config_entry = { + 'name': name, + 'description': description, + 'is_active': True, + 'is_remote_tool': True, + 'url': url, + 'method': method.upper(), + 'parameters': [], + 'header': { + 'Content-Type': 'application/json', + 'Authorization': authorization + } + } + elif method.upper() == 'GET': + parameters_list = [] + parameters_list = details.get('parameters', []) + config_entry = { + 'name': name, + 'description': description, + 'is_active': True, + 'is_remote_tool': True, + 'url': url, + 'method': method.upper(), + 'parameters': parameters_list, + 'header': { + 'Authorization': authorization + } + } + else: + raise 'method is not POST or GET' + + config_data[summary] = config_entry + return config_data diff --git a/my_modelscope_agent/tools/pipeline_tool.py b/my_modelscope_agent/tools/pipeline_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..12f676dbd56b18e4ddf1ac7130f7a3bef6751a91 --- /dev/null +++ b/my_modelscope_agent/tools/pipeline_tool.py @@ -0,0 +1,40 @@ +from modelscope.pipelines import pipeline +from .tool import Tool + + +class ModelscopePipelineTool(Tool): + + default_model: str = '' + task: str = '' + model_revision = None + + def __init__(self, cfg): + + super().__init__(cfg) + self.model = self.cfg.get('model', None) or self.default_model + self.model_revision = self.cfg.get('model_revision', + None) or self.model_revision + + self.pipeline_params = self.cfg.get('pipeline_params', {}) + self.pipeline = None + self.is_initialized = False + + def setup(self): + + # only initialize when this tool is really called to save memory + if not self.is_initialized: + self.pipeline = pipeline( + task=self.task, + model=self.model, + model_revision=self.model_revision, + **self.pipeline_params) + self.is_initialized = True + + def _local_call(self, *args, **kwargs): + + self.setup() + + parsed_args, parsed_kwargs = self._local_parse_input(*args, **kwargs) + origin_result = self.pipeline(*parsed_args, **parsed_kwargs) + final_result = self._parse_output(origin_result, remote=False) + return final_result diff --git a/my_modelscope_agent/tools/plugin_tool.py b/my_modelscope_agent/tools/plugin_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..bd1242448545b6042aa8f12d2fcd5ea959306427 --- /dev/null +++ b/my_modelscope_agent/tools/plugin_tool.py @@ -0,0 +1,30 @@ +from copy import deepcopy + +from .tool import Tool + + +class LangchainTool(Tool): + + def __init__(self, langchain_tool): + from langchain.tools import BaseTool + + if not isinstance(langchain_tool, BaseTool): + raise ValueError('langchain_tool should be type of langchain tool') + self.langchain_tool = langchain_tool + self.parse_langchain_schema() + super().__init__() + + def parse_langchain_schema(self): + # convert langchain tool schema to modelscope_agent tool schema + self.description = self.langchain_tool.description + self.name = self.langchain_tool.name + self.parameters = [] + for name, arg in self.langchain_tool.args.items(): + tool_arg = deepcopy(arg) + tool_arg['name'] = name + tool_arg['required'] = True + tool_arg.pop('title') + self.parameters.append(tool_arg) + + def _local_call(self, *args, **kwargs): + return {'result': self.langchain_tool.run(kwargs)} diff --git a/my_modelscope_agent/tools/text_address_tool.py b/my_modelscope_agent/tools/text_address_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..8c52b147b84fbe6d8da0e69989e936b8b731ea04 --- /dev/null +++ b/my_modelscope_agent/tools/text_address_tool.py @@ -0,0 +1,20 @@ +from modelscope.utils.constant import Tasks +from .pipeline_tool import ModelscopePipelineTool + + +class TextAddressTool(ModelscopePipelineTool): + default_model = 'damo/mgeo_geographic_elements_tagging_chinese_base' + description = '地址解析服务,针对中文地址信息,识别出里面的元素,包括省、市、区、镇、社区、道路、路号、POI、楼栋号、户室号等' + name = 'modelscope_text-address' + parameters: list = [{ + 'name': 'input', + 'description': '用户输入的地址信息', + 'required': True + }] + task = Tasks.token_classification + + def _parse_output(self, origin_result, *args, **kwargs): + final_result = {} + for e in origin_result['output']: + final_result[e['type']] = e['span'] + return {'result': final_result} diff --git a/my_modelscope_agent/tools/text_ie_tool.py b/my_modelscope_agent/tools/text_ie_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..d8c983e8481cace0acaa228eddf22bd6df28af01 --- /dev/null +++ b/my_modelscope_agent/tools/text_ie_tool.py @@ -0,0 +1,32 @@ +from collections import defaultdict + +from modelscope.utils.constant import Tasks +from .pipeline_tool import ModelscopePipelineTool + + +class TextInfoExtractTool(ModelscopePipelineTool): + default_model = 'damo/nlp_structbert_siamese-uie_chinese-base' + description = '信息抽取服务,针对中文的文本,根据schema要抽取的内容,找出其中对应信息,并用json格式展示' + name = 'modelscope_text-ie' + parameters: list = [{ + 'name': 'input', + 'description': '用户输入的文本', + 'required': True + }, { + 'name': 'schema', + 'description': '要抽取信息的json表示', + 'required': True + }] + task = Tasks.siamese_uie + + def _remote_parse_input(self, *args, **kwargs): + kwargs['parameters'] = {'schema': kwargs['schema']} + kwargs.pop('schema') + return kwargs + + def _parse_output(self, origin_result, *args, **kwargs): + final_result = defaultdict(list) + for e in origin_result['output']: + final_result[e[0]['type']].append(e[0]['span']) + + return {'result': dict(final_result)} diff --git a/my_modelscope_agent/tools/text_ner_tool.py b/my_modelscope_agent/tools/text_ner_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..a694a96c90a2b6d9e344754b065e81a8b5897ee9 --- /dev/null +++ b/my_modelscope_agent/tools/text_ner_tool.py @@ -0,0 +1,22 @@ +from collections import defaultdict + +from modelscope.utils.constant import Tasks +from .pipeline_tool import ModelscopePipelineTool + + +class TextNerTool(ModelscopePipelineTool): + default_model = 'damo/nlp_raner_named-entity-recognition_chinese-base-news' + description = '命名实体识别服务,针对需要识别的中文文本,找出其中的实体,返回json格式结果' + name = 'modelscope_text-ner' + parameters: list = [{ + 'name': 'input', + 'description': '用户输入的文本', + 'required': True + }] + task = Tasks.named_entity_recognition + + def _parse_output(self, origin_result, *args, **kwargs): + final_result = defaultdict(list) + for e in origin_result['output']: + final_result[e['type']].append(e['span']) + return {'result': dict(final_result)} diff --git a/my_modelscope_agent/tools/text_to_image_tool.py b/my_modelscope_agent/tools/text_to_image_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..088749729723679498b2d97f9f3443f6f8f70e60 --- /dev/null +++ b/my_modelscope_agent/tools/text_to_image_tool.py @@ -0,0 +1,114 @@ +import os +import re + +import cv2 +import dashscope +import json +from dashscope import ImageSynthesis +from ..output_wrapper import ImageWrapper + +from modelscope.utils.constant import Tasks +from .pipeline_tool import ModelscopePipelineTool + + +class TextToImageTool(ModelscopePipelineTool): + default_model = 'AI-ModelScope/stable-diffusion-xl-base-1.0' + description = 'AI绘画(图像生成)服务,输入文本描述和图像分辨率,返回根据文本信息绘制的图片URL。' + name = 'image_gen' + parameters: list = [{ + 'name': 'text', + 'description': '详细描述了希望生成的图像具有什么内容,例如人物、环境、动作等细节描述', + 'required': True, + 'schema': { + 'type': 'string' + } + }, { + 'name': 'resolution', + 'description': + '格式是 数字*数字,表示希望生成的图像的分辨率大小,选项有[1024*1024, 720*1280, 1280*720]', + 'required': True, + 'schema': { + 'type': 'string' + } + }] + model_revision = 'v1.0.0' + task = Tasks.text_to_image_synthesis + + # def _remote_parse_input(self, *args, **kwargs): + # params = { + # 'input': { + # 'text': kwargs['text'], + # 'resolution': kwargs['resolution'] + # } + # } + # if kwargs.get('seed', None): + # params['input']['seed'] = kwargs['seed'] + # return params + + def _remote_call(self, *args, **kwargs): + + if ('resolution' in kwargs) and (kwargs['resolution'] in [ + '1024*1024', '720*1280', '1280*720' + ]): + resolution = kwargs['resolution'] + else: + resolution = '1280*720' + + prompt = kwargs['text'] + seed = kwargs.get('seed', None) + if prompt is None: + return None + dashscope.api_key = os.getenv('DASHSCOPE_API_KEY') + response = ImageSynthesis.call( + model=ImageSynthesis.Models.wanx_v1, + prompt=prompt, + n=1, + size=resolution, + steps=10, + seed=seed) + final_result = self._parse_output(response, remote=True) + return final_result + + def _local_parse_input(self, *args, **kwargs): + + text = kwargs.pop('text', '') + + parsed_args = ({'text': text}, ) + + return parsed_args, {} + + def _parse_output(self, origin_result, remote=True): + if not remote: + image = cv2.cvtColor(origin_result['output_imgs'][0], + cv2.COLOR_BGR2RGB) + else: + image = origin_result.output['results'][0]['url'] + + return {'result': ImageWrapper(image)} + + def _handle_input_fallback(self, **kwargs): + """ + an alternative method is to parse image is that get item between { and } + for last try + + :param fallback_text: + :return: language, cocde + """ + + text = kwargs.get('text', None) + fallback = kwargs.get('fallback', None) + + if text: + return text + elif fallback: + try: + text = fallback + json_block = re.search(r'\{([\s\S]+)\}', text) # noqa W^05 + if json_block: + result = json_block.group(1) + result_json = json.loads('{' + result + '}') + return result_json['text'] + except ValueError: + return text + else: + return text diff --git a/my_modelscope_agent/tools/text_to_speech_tool.py b/my_modelscope_agent/tools/text_to_speech_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..47c7beb05698807b28affe3d5d1523b737f5d097 --- /dev/null +++ b/my_modelscope_agent/tools/text_to_speech_tool.py @@ -0,0 +1,44 @@ +from ..output_wrapper import AudioWrapper + +from modelscope.utils.constant import Tasks +from .pipeline_tool import ModelscopePipelineTool + + +class TexttoSpeechTool(ModelscopePipelineTool): + default_model = 'damo/speech_sambert-hifigan_tts_zh-cn_16k' + description = '文本转语音服务,将文字转换为自然而逼真的语音,可配置男声/女声' + name = 'modelscope_speech-generation' + parameters: list = [{ + 'name': 'input', + 'description': '要转成语音的文本', + 'required': True + }, { + 'name': 'gender', + 'description': '用户身份', + 'required': True + }] + task = Tasks.text_to_speech + + def _local_parse_input(self, *args, **kwargs): + if 'gender' not in kwargs: + kwargs['gender'] = 'man' + voice = 'zhizhe_emo' if kwargs['gender'] == 'man' else 'zhiyan_emo' + kwargs['voice'] = voice + if 'text' in kwargs and 'input' not in kwargs: + kwargs['input'] = kwargs['text'] + kwargs.pop('text') + kwargs.pop('gender') + return args, kwargs + + def _remote_parse_input(self, *args, **kwargs): + if 'gender' not in kwargs: + kwargs['gender'] = 'man' + voice = 'zhizhe_emo' if kwargs['gender'] == 'man' else 'zhiyan_emo' + kwargs['voice'] = voice + kwargs.pop('gender') + return kwargs + + def _parse_output(self, origin_result, remote=True): + + audio = origin_result['output_wav'] + return {'result': AudioWrapper(audio)} diff --git a/my_modelscope_agent/tools/text_to_video_tool.py b/my_modelscope_agent/tools/text_to_video_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..2bcacb9ad57e113c98824465792ccef535930a66 --- /dev/null +++ b/my_modelscope_agent/tools/text_to_video_tool.py @@ -0,0 +1,40 @@ +import os +import tempfile +import uuid + +from ..output_wrapper import VideoWrapper + +from modelscope.utils.constant import Tasks +from .pipeline_tool import ModelscopePipelineTool + + +class TextToVideoTool(ModelscopePipelineTool): + default_model = 'damo/text-to-video-synthesis' + description = '视频生成服务,针对英文文本输入,生成一段描述视频;如果是中文输入同时依赖插件modelscope_text-translation-zh2en翻译成英文' + + name = 'modelscope_video-generation' + parameters: list = [{ + 'name': 'text', + 'description': '用户输入的文本信息', + 'required': True + }] + task = Tasks.text_to_video_synthesis + + def _remote_parse_input(self, *args, **kwargs): + return {'input': {'text': kwargs['text']}} + + def _local_parse_input(self, *args, **kwargs): + + text = kwargs.pop('text', '') + directory = tempfile.mkdtemp() + file_path = os.path.join(directory, str(uuid.uuid4()) + '.mp4') + + parsed_args = ({'text': text}, ) + parsed_kwargs = {'output_video': file_path} + + return parsed_args, parsed_kwargs + + def _parse_output(self, origin_result, remote=True): + + video = origin_result['output_video'] + return {'result': VideoWrapper(video)} diff --git a/my_modelscope_agent/tools/tool.py b/my_modelscope_agent/tools/tool.py new file mode 100644 index 0000000000000000000000000000000000000000..5252f1c9c12658d118059e0179b21d4b29319941 --- /dev/null +++ b/my_modelscope_agent/tools/tool.py @@ -0,0 +1,180 @@ +import os +from typing import List, Optional + +import json +import requests +from pydantic import BaseModel, ValidationError +from requests.exceptions import RequestException, Timeout + +MODELSCOPE_API_TOKEN = os.getenv('MODELSCOPE_API_TOKEN') + +MAX_RETRY_TIMES = 3 + + +class ParametersSchema(BaseModel): + name: str + description: str + required: Optional[bool] = True + + +class ToolSchema(BaseModel): + name: str + description: str + parameters: List[ParametersSchema] + + +class Tool: + """ + a base class for tools. + when you inherit this class and implement new tool, you should provide name, description + and parameters of tool that conforms with schema. + + each tool may have two call method: _local_call(execute tool in your local environment) + and _remote_call(construct a http request to remote server). + corresponding to preprocess and postprocess method may need to be overrided to get correct result. + """ + name: str = 'tool' + description: str = 'This is a tool that ...' + parameters: list = [] + + def __init__(self, cfg={}): + self.cfg = cfg.get(self.name, {}) + self.is_remote_tool = self.cfg.get('is_remote_tool', False) + + # remote call + self.url = self.cfg.get('url', '') + self.token = self.cfg.get('token', '') + self.header = { + 'Authorization': self.token or f'Bearer {MODELSCOPE_API_TOKEN}' + } + + try: + all_para = { + 'name': self.name, + 'description': self.description, + 'parameters': self.parameters + } + self.tool_schema = ToolSchema(**all_para) + except ValidationError: + raise ValueError(f'Error when parsing parameters of {self.name}') + + self._str = self.tool_schema.model_dump_json() + self._function = self.parse_pydantic_model_to_openai_function(all_para) + + def __call__(self, remote=False, *args, **kwargs): + if self.is_remote_tool or remote: + return self._remote_call(*args, **kwargs) + else: + return self._local_call(*args, **kwargs) + + def _remote_call(self, *args, **kwargs): + if self.url == '': + raise ValueError( + f"Could not use remote call for {self.name} since this tool doesn't have a remote endpoint" + ) + + remote_parsed_input = json.dumps( + self._remote_parse_input(*args, **kwargs)) + + origin_result = None + retry_times = MAX_RETRY_TIMES + while retry_times: + retry_times -= 1 + try: + response = requests.request( + 'POST', + self.url, + headers=self.header, + data=remote_parsed_input) + if response.status_code != requests.codes.ok: + response.raise_for_status() + + origin_result = json.loads( + response.content.decode('utf-8'))['Data'] + + final_result = self._parse_output(origin_result, remote=True) + return final_result + except Timeout: + continue + except RequestException as e: + raise ValueError( + f'Remote call failed with error code: {e.response.status_code},\ + error message: {e.response.content.decode("utf-8")}') + + raise ValueError( + 'Remote call max retry times exceeded! Please try to use local call.' + ) + + def _local_call(self, *args, **kwargs): + return + + def _remote_parse_input(self, *args, **kwargs): + return kwargs + + def _local_parse_input(self, *args, **kwargs): + return args, kwargs + + def _parse_output(self, origin_result, *args, **kwargs): + return {'result': origin_result} + + def __str__(self): + return self._str + + def get_function(self): + return self._function + + def parse_pydantic_model_to_openai_function(self, all_para: dict): + ''' + this method used to convert a pydantic model to openai function schema + such that convert + all_para = { + 'name': get_current_weather, + 'description': Get the current weather in a given location, + 'parameters': [{ + 'name': 'image', + 'description': '用户输入的图片', + 'required': True + }, { + 'name': 'text', + 'description': '用户输入的文本', + 'required': True + }] + } + to + { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "image": { + "type": "string", + "description": "用户输入的图片", + }, + "text": { + "type": "string", + "description": "用户输入的文本", + }, + "required": ["image", "text"], + }, + } + ''' + + function = { + 'name': all_para['name'], + 'description': all_para['description'], + 'parameters': { + 'type': 'object', + 'properties': {}, + 'required': [], + }, + } + for para in all_para['parameters']: + function['parameters']['properties'][para['name']] = { + 'type': 'string', + 'description': para['description'] + } + if para['required']: + function['parameters']['required'].append(para['name']) + + return function diff --git a/my_modelscope_agent/tools/translation_en2zh_tool.py b/my_modelscope_agent/tools/translation_en2zh_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..2e6803e320b3093113e63a288a896938f292136a --- /dev/null +++ b/my_modelscope_agent/tools/translation_en2zh_tool.py @@ -0,0 +1,17 @@ +from modelscope.utils.constant import Tasks +from .pipeline_tool import ModelscopePipelineTool + + +class TranslationEn2ZhTool(ModelscopePipelineTool): + default_model = 'damo/nlp_csanmt_translation_en2zh' + description = '根据输入指令,将相应的英文文本翻译成中文回复' + name = 'modelscope_text-translation-en2zh' + task = Tasks.translation + parameters: list = [{ + 'name': 'input', + 'description': '用户输入的英文文本', + 'required': True + }] + + def _parse_output(self, origin_result, *args, **kwargs): + return {'result': origin_result['translation']} diff --git a/my_modelscope_agent/tools/translation_zh2en_tool.py b/my_modelscope_agent/tools/translation_zh2en_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..6371acb1e22f2b607025840bc2b7dd8dd96facd1 --- /dev/null +++ b/my_modelscope_agent/tools/translation_zh2en_tool.py @@ -0,0 +1,17 @@ +from modelscope.utils.constant import Tasks +from .pipeline_tool import ModelscopePipelineTool + + +class TranslationZh2EnTool(ModelscopePipelineTool): + default_model = 'damo/nlp_csanmt_translation_zh2en' + description = '根据输入指令,将相应的中文文本翻译成英文回复' + name = 'modelscope_text-translation-zh2en' + task = Tasks.translation + parameters: list = [{ + 'name': 'input', + 'description': '用户输入的中文文本', + 'required': True + }] + + def _parse_output(self, origin_result, *args, **kwargs): + return {'result': origin_result['translation']} diff --git a/my_modelscope_agent/tools/web_browser.py b/my_modelscope_agent/tools/web_browser.py new file mode 100644 index 0000000000000000000000000000000000000000..0d0b7e2c2771e0823d2089465edd83cd24b9cdc3 --- /dev/null +++ b/my_modelscope_agent/tools/web_browser.py @@ -0,0 +1,72 @@ +import httpx +from langchain.document_loaders import AsyncHtmlLoader +from langchain.document_transformers import BeautifulSoupTransformer +from langchain.text_splitter import RecursiveCharacterTextSplitter +from ..tools.tool import Tool + + +class WebBrowser(Tool): + description = '生成艺术字纹理图片' + name = 'web_browser' + parameters: list = [{ + 'name': 'urls', + 'description': 'the urls that the user wants to browse', + 'required': True + }] + + def __init__(self, cfg={}): + super().__init__(cfg) + self.split_url_into_chunk = self.cfg.get('split_url_into_chunk', False) + self.headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)' + } + self.client = httpx.Client( + headers=self.headers, verify=False, timeout=30.0) + + def _local_call(self, *args, **kwargs): + parsed_args, parsed_kwargs = self._local_parse_input(*args, **kwargs) + + urls = parsed_kwargs['urls'] + print(urls) + if urls is None: + return {'result': ''} + + # # load html + loader = AsyncHtmlLoader(urls) + docs = loader.load() + # Transform + bs_transformer = BeautifulSoupTransformer() + docs_transformed = bs_transformer.transform_documents( + docs, tags_to_extract=['span']) + + # split url content into chunk in order to get fine-grained results + if self.split_url_into_chunk: + # Grab the first 1000 tokens of the site + splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( + chunk_size=1000, chunk_overlap=0) + splits = splitter.split_documents(docs_transformed) + else: + splits = docs_transformed + search_results = [] + for item in splits: + result = { + 'url': item.metadata['source'], + 'content': item.page_content + } + search_results.append(result) + + return {'result': search_results} + + def _local_parse_input(self, *args, **kwargs): + urls = kwargs.get('urls', []) + if isinstance(urls, str): + urls = [urls] + kwargs['urls'] = urls + return args, kwargs + + +if __name__ == '__main__': + tool = WebBrowser() + urls = ['https://blog.sina.com.cn/zhangwuchang'] + result = tool._local_call(urls=urls) + print(result) diff --git a/my_modelscope_agent/tools/web_search.py b/my_modelscope_agent/tools/web_search.py new file mode 100644 index 0000000000000000000000000000000000000000..e40f68b31a10d26a015329f4357152947ac3ad1d --- /dev/null +++ b/my_modelscope_agent/tools/web_search.py @@ -0,0 +1,85 @@ +import os + +from ..tools.tool import Tool, ToolSchema +from ..tools.web_search_utils import get_websearcher_cls +from ..tools.web_search_utils.search_util import \ + AuthenticationKey +from pydantic import ValidationError + + +class WebSearch(Tool): + description = 'surfacing relevant information from billions of web documents. Help ' \ + 'you find what you are looking for from the world-wide-web to comb ' \ + 'billions of webpages, images, videos, and news.' + name = 'web_search_utils' + parameters: list = [{ + 'name': 'query', + 'description': + """The user's search query term. The term may not be empty.""", + 'required': True + }] + + def __init__(self, cfg={}): + super().__init__() + available_searchers = get_websearcher_cls() + all_searchers = AuthenticationKey.to_dict() + if not len(available_searchers): + raise ValueError( + f'At least one of web search api token should be set: {all_searchers}' + ) + + searcher = cfg.pop('searcher', None) + + if not searcher: + self.searcher = available_searchers[0](**cfg) + else: + if isinstance(searcher, + str) and len(searcher) and all_searchers.get( + searcher, None): + cls = available_searchers.get(searcher, None) + if not cls: + raise ValueError( + f'The searcher {searcher}\'s token is not set: {all_searchers.get(searcher, None)}' + ) + self.searcher = cls(**cfg) + else: + raise ValueError( + f'The searcher {searcher} should be one of {all_searchers.keys()}' + ) + + try: + all_para = { + 'name': self.name, + 'description': self.description, + 'parameters': self.parameters + } + self.tool_schema = ToolSchema(**all_para) + except ValidationError: + raise ValueError(f'Error when parsing parameters of {self.name}') + + self.is_remote_tool = True + self._str = self.tool_schema.model_dump_json() + self._function = self.parse_pydantic_model_to_openai_function(all_para) + + def _remote_call(self, *args, **kwargs): + query = self._handle_input_fallback(**kwargs) + if not query or not len(query): + raise ValueError( + 'parameter `query` of tool web-search is None or Empty.') + + res = self.searcher(query) + return {'result': [item.__dict__ for item in res]} + + def _handle_input_fallback(self, **kwargs): + query = kwargs.get('query', None) + fallback = kwargs.get('fallback', None) + if query and isinstance(query, str) and len(query): + return query + else: + return fallback + + +if __name__ == '__main__': + tool = WebSearch() + res = tool(query='2024年 元旦 哈尔滨天气') + print(res) diff --git a/my_modelscope_agent/tools/web_search_utils/__init__.py b/my_modelscope_agent/tools/web_search_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..906c6277e8b50754a7be8e9e1c05ee0049e14cac --- /dev/null +++ b/my_modelscope_agent/tools/web_search_utils/__init__.py @@ -0,0 +1,2 @@ +from ..web_search_utils.search_util import \ + get_websearcher_cls diff --git a/my_modelscope_agent/tools/web_search_utils/search_util.py b/my_modelscope_agent/tools/web_search_utils/search_util.py new file mode 100644 index 0000000000000000000000000000000000000000..843e48eda16fbb0fe39b3163f18af47b746e5f2a --- /dev/null +++ b/my_modelscope_agent/tools/web_search_utils/search_util.py @@ -0,0 +1,40 @@ +import os + + +class SearchResult: + + def __init__(self, title=None, link=None, sniper=None): + assert link or sniper + self.title = title + self.link = link + self.sniper = sniper + + +class AuthenticationKey: + bing = 'BING_SEARCH_V7_SUBSCRIPTION_KEY' + kuake = 'PLACE_HOLDER' + + @classmethod + def to_dict(cls): + raw_dict = cls.__dict__ + res = dict( + filter(lambda x: '__' not in x[0] and isinstance(x[1], str), + raw_dict.items())) + return res + + +def get_websearcher_cls(): + + def get_env(authentication_key: str): + env = os.environ + return env.get(authentication_key, None) + + cls_list = [] + if get_env(AuthenticationKey.bing): + from ..web_search_utils.searcher.bing import BingWebSearcher + cls_list.append(BingWebSearcher) + if get_env(AuthenticationKey.kuanke): + from ..web_search_utils.searcher.kuake import KuakeWebSearcher + cls_list.append(KuakeWebSearcher) + + return cls_list diff --git a/my_modelscope_agent/tools/web_search_utils/searcher/__init__.py b/my_modelscope_agent/tools/web_search_utils/searcher/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/my_modelscope_agent/tools/web_search_utils/searcher/base_searcher.py b/my_modelscope_agent/tools/web_search_utils/searcher/base_searcher.py new file mode 100644 index 0000000000000000000000000000000000000000..6f72cf1ebdb18cf19b520014d442750e908b0a22 --- /dev/null +++ b/my_modelscope_agent/tools/web_search_utils/searcher/base_searcher.py @@ -0,0 +1,5 @@ +class WebSearcher: + timeout = 1000 + + def __call__(self, **kwargs): + raise NotImplementedError() diff --git a/my_modelscope_agent/tools/web_search_utils/searcher/bing.py b/my_modelscope_agent/tools/web_search_utils/searcher/bing.py new file mode 100644 index 0000000000000000000000000000000000000000..393f843063928be2a8d8474585283d986b43581c --- /dev/null +++ b/my_modelscope_agent/tools/web_search_utils/searcher/bing.py @@ -0,0 +1,59 @@ +import os + +import json +import requests +from ..search_util import ( + AuthenticationKey, SearchResult) + +from .base_searcher import WebSearcher + + +class BingWebSearcher(WebSearcher): + + def __init__( + self, + timeout=3000, + mkt='en-US', + endpoint='https://api.bing.microsoft.com/v7.0/search', + ): + self.mkt = mkt + self.endpoint = endpoint + self.timeout = timeout + self.token = os.environ.get(AuthenticationKey.bing) + + def __call__(self, query, **kwargs): + params = {'q': query, 'mkt': self.mkt} + headers = {'Ocp-Apim-Subscription-Key': self.token} + if kwargs: + params.update(kwargs) + try: + response = requests.get( + self.endpoint, + headers=headers, + params=params, + timeout=self.timeout) + raw_result = json.loads(response.text) + if raw_result.get('error', None): + print(f'Call Bing web search api failed: {raw_result}') + except Exception as ex: + raise ex('Call Bing web search api failed.') + + results = [] + res_list = raw_result.get('webPages', {}).get('value', []) + for item in res_list: + title = item.get('name', None) + link = item.get('url', None) + sniper = item.get('snippet', None) + if not link and not sniper: + continue + + results.append(SearchResult(title=title, link=link, sniper=sniper)) + + return results + + +if __name__ == '__main__': + + searcher = BingWebSearcher() + res = searcher('哈尔滨元旦的天气情况') + print([item.__dict__ for item in res]) diff --git a/my_modelscope_agent/tools/web_search_utils/searcher/kuake.py b/my_modelscope_agent/tools/web_search_utils/searcher/kuake.py new file mode 100644 index 0000000000000000000000000000000000000000..9c135dfa5ed2081c40012dfa4195b44950874d6e --- /dev/null +++ b/my_modelscope_agent/tools/web_search_utils/searcher/kuake.py @@ -0,0 +1,7 @@ +from .base_searcher import WebSearcher + + +class KuakeWebSearcher(WebSearcher): + + def __call__(self, query, **kwargs): + raise NotImplementedError() diff --git a/my_modelscope_agent/tools/wordart_tool.py b/my_modelscope_agent/tools/wordart_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..65a28d9d8b3c69df172a3c41f0c98b0ba4eb927a --- /dev/null +++ b/my_modelscope_agent/tools/wordart_tool.py @@ -0,0 +1,169 @@ +import os +import time + +import json +import pandas as pd +import requests +from ..tools.tool import Tool, ToolSchema +from pydantic import ValidationError +from requests.exceptions import RequestException, Timeout + +MAX_RETRY_TIMES = 3 + + +class WordArtTexture(Tool): + description = '生成艺术字纹理图片' + name = 'wordart_texture_generation' + parameters: list = [{ + 'name': 'input.text.text_content', + 'description': 'text that the user wants to convert to WordArt', + 'required': True + }, { + 'name': 'input.prompt', + 'description': + 'Users’ style requirements for word art may be requirements in terms of shape, color, entity, etc.', + 'required': True + }, { + 'name': 'input.texture_style', + 'description': + 'Type of texture style;Default is "material";If not provided by the user, \ + defaults to "material".Another value is scene.', + 'required': True + }, { + 'name': 'input.text.output_image_ratio', + 'description': + 'The aspect ratio of the text input image; the default is "1:1", \ + the available ratios are: "1:1", "16:9", "9:16";', + 'required': True + }] + + def __init__(self, cfg={}): + self.cfg = cfg.get(self.name, {}) + # remote call + self.url = 'https://dashscope.aliyuncs.com/api/v1/services/aigc/wordart/texture' + self.token = self.cfg.get('token', + os.environ.get('DASHSCOPE_API_KEY', '')) + assert self.token != '', 'dashscope api token must be acquired with wordart' + + try: + all_param = { + 'name': self.name, + 'description': self.description, + 'parameters': self.parameters + } + self.tool_schema = ToolSchema(**all_param) + except ValidationError: + raise ValueError(f'Error when parsing parameters of {self.name}') + + self._str = self.tool_schema.model_dump_json() + self._function = self.parse_pydantic_model_to_openai_function( + all_param) + + def __call__(self, *args, **kwargs): + remote_parsed_input = json.dumps( + self._remote_parse_input(*args, **kwargs)) + origin_result = None + retry_times = MAX_RETRY_TIMES + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.token}', + 'X-DashScope-Async': 'enable' + } + while retry_times: + retry_times -= 1 + try: + + response = requests.request( + 'POST', + url=self.url, + headers=headers, + data=remote_parsed_input) + + if response.status_code != requests.codes.ok: + response.raise_for_status() + origin_result = json.loads(response.content.decode('utf-8')) + + self.final_result = self._parse_output( + origin_result, remote=True) + return self.get_wordart_result() + except Timeout: + continue + except RequestException as e: + raise ValueError( + f'Remote call failed with error code: {e.response.status_code},\ + error message: {e.response.content.decode("utf-8")}') + + raise ValueError( + 'Remote call max retry times exceeded! Please try to use local call.' + ) + + def _remote_parse_input(self, *args, **kwargs): + restored_dict = {} + for key, value in kwargs.items(): + if '.' in key: + # Split keys by "." and create nested dictionary structures + keys = key.split('.') + temp_dict = restored_dict + for k in keys[:-1]: + temp_dict = temp_dict.setdefault(k, {}) + temp_dict[keys[-1]] = value + else: + # f the key does not contain ".", directly store the key-value pair into restored_dict + restored_dict[key] = value + kwargs = restored_dict + kwargs['model'] = 'wordart-texture' + print('传给tool的参数:', kwargs) + return kwargs + + def get_result(self): + result_data = json.loads(json.dumps(self.final_result['result'])) + if 'task_id' in result_data['output']: + task_id = result_data['output']['task_id'] + get_url = f'https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}' + get_header = {'Authorization': f'Bearer {self.token}'} + origin_result = None + retry_times = MAX_RETRY_TIMES + while retry_times: + retry_times -= 1 + try: + response = requests.request( + 'GET', url=get_url, headers=get_header) + if response.status_code != requests.codes.ok: + response.raise_for_status() + origin_result = json.loads(response.content.decode('utf-8')) + + get_result = self._parse_output(origin_result, remote=True) + return get_result + except Timeout: + continue + except RequestException as e: + raise ValueError( + f'Remote call failed with error code: {e.response.status_code},\ + error message: {e.response.content.decode("utf-8")}') + + raise ValueError( + 'Remote call max retry times exceeded! Please try to use local call.' + ) + + def get_wordart_result(self): + try: + result = self.get_result() + print(result) + while True: + result_data = result.get('result', {}) + output = result_data.get('output', {}) + task_status = output.get('task_status', '') + + if task_status == 'SUCCEEDED': + print('任务已完成') + return result + + elif task_status == 'FAILED': + raise ('任务失败') + else: + # 继续轮询,等待一段时间后再次调用 + time.sleep(1) # 等待 1 秒钟 + result = self.get_result() + + except Exception as e: + print('get Remote Error:', str(e)) diff --git a/my_modelscope_agent/version.py b/my_modelscope_agent/version.py new file mode 100644 index 0000000000000000000000000000000000000000..683418800a8b84a130ce976b5a71f37242642884 --- /dev/null +++ b/my_modelscope_agent/version.py @@ -0,0 +1 @@ +__version__ = '0.2.1-rc0' diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ce44072ba77b3f5336901320c0834e67565208e1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,21 @@ +gradio +dashscope +datasets>=2.8.0 +ipython +jupyter>=1.0.0 +langchain<=0.0.292 +modelscope>=1.7.0 +moviepy +ms-swift +openai +opencv-python +openpyxl +Pillow +pydantic>=2.0.0 +pypdf +pytest +python-dotenv +seaborn +soundfile +transformers>=4.29.0 +transformers_stream_generator \ No newline at end of file