Spaces:

jianuo
/

I-am-agent

Running

File size: 1,773 Bytes

09321b6

from modelscope.utils.constant import Tasks
from .pipeline_tool import ModelscopePipelineTool


class ImageChatTool(ModelscopePipelineTool):
    default_model = 'damo/multi-modal_mplug_owl_multimodal-dialogue_7b'
    description = '图文对话和图像描述服务，针对输入的图片和用户的文本输入，给出文本回复'
    name = 'modelscope_image-chat'
    parameters: list = [{
        'name': 'image',
        'description': '用户输入的图片',
        'required': True
    }, {
        'name': 'text',
        'description': '用户输入的文本',
        'required': True
    }]
    task = Tasks.multimodal_dialogue

    def construct_image_chat_input(self, **kwargs):
        image = kwargs.pop('image', '')
        text = kwargs.pop('text', '')

        system_prompt_1 = 'The following is a conversation between a curious human and AI assistant.'
        system_prompt_2 = "The assistant gives helpful, detailed, and polite answers to the user's questions."
        messages = {
            'messages': [
                {
                    'role': 'system',
                    'content': system_prompt_1 + ' ' + system_prompt_2
                },
                {
                    'role': 'user',
                    'content': [{
                        'image': image
                    }]
                },
                {
                    'role': 'user',
                    'content': text
                },
            ]
        }
        return messages

    def _remote_parse_input(self, *args, **kwargs):
        messages = self.construct_image_chat_input(**kwargs)
        return {'input': messages}

    def _local_parse_input(self, *args, **kwargs):
        return (self.construct_image_chat_input(**kwargs)), {}