Spaces:

ZackBradshaw
/

omni_bot

Runtime error

File size: 5,054 Bytes
from typing import Dict, List

from langchain.base_language import BaseLanguageModel
from langchain.tools.base import BaseTool
from langchain_experimental.autonomous_agents.hugginggpt.repsonse_generator import (
    load_response_generator,
)
from langchain_experimental.autonomous_agents.hugginggpt.task_executor import (
    TaskExecutor,
)
from langchain_experimental.autonomous_agents.hugginggpt.task_planner import (
    load_chat_planner,
)
from transformers import load_tool
from swarms.agents.message import Message


class Step:
    def __init__(
        self,
        task: str,
        id: int,
        dep: List[int],
        args: Dict[str, str],
        tool: BaseTool
    ):
        self.task = task
        self.id = id
        self.dep = dep
        self.args = args
        self.tool = tool

class Plan:
    def __init__(
        self,
        steps: List[Step]
    ):
        self.steps = steps
    
    def __str__(self) -> str:
        return str([str(step) for step in self.steps])
    
    def __repr(self) -> str:
        return str(self)





class OmniModalAgent:
    """
    OmniModalAgent
    LLM -> Plans -> Tasks -> Tools -> Response

    Architecture:
    1. LLM: Language Model
    2. Chat Planner: Plans
    3. Task Executor: Tasks
    4. Tools: Tools

    Args:
        llm (BaseLanguageModel): Language Model
        tools (List[BaseTool]): List of tools

    Returns:
        str: response

    Usage:
    from swarms import OmniModalAgent, OpenAIChat,

    llm = OpenAIChat()
    agent = OmniModalAgent(llm)
    response = agent.run("Hello, how are you? Create an image of how your are doing!")
    """
    def __init__(
        self,
        llm: BaseLanguageModel,
        # tools: List[BaseTool]
    ):
        self.llm = llm
        
        print("Loading tools...")
        self.tools = [
            load_tool(tool_name)
            for tool_name in [
                "document-question-answering",
                "image-captioning",
                "image-question-answering",
                "image-segmentation",
                "speech-to-text",
                "summarization",
                "text-classification",
                "text-question-answering",
                "translation",
                "huggingface-tools/text-to-image",
                "huggingface-tools/text-to-video",
                "text-to-speech",
                "huggingface-tools/text-download",
                "huggingface-tools/image-transformation",
            ]
        ]
        
        self.chat_planner = load_chat_planner(llm)
        self.response_generator = load_response_generator(llm)
        # self.task_executor = TaskExecutor
        self.history = []
    

    def run(
        self, 
        input: str
    ) -> str:
        """Run the OmniAgent"""
        plan = self.chat_planner.plan(
            inputs={
                "input": input,
                "hf_tools": self.tools,
            }
        )
        self.task_executor = TaskExecutor(plan)
        self.task_executor.run()

        response = self.response_generator.generate(
            {"task_execution": self.task_executor}
        )

        return response
    
    def chat(
        self,
        msg: str = None,
        streaming: bool = False
    ):
        """
        Run chat
        
        Args:
            msg (str, optional): Message to send to the agent. Defaults to None.
            language (str, optional): Language to use. Defaults to None.
            streaming (bool, optional): Whether to stream the response. Defaults to False.

        Returns:
            str: Response from the agent
        
        Usage:
        --------------
        agent = MultiModalAgent()
        agent.chat("Hello")
        
        """
        
        #add users message to the history
        self.history.append(
            Message(
                "User",
                msg
            )
        )

        #process msg
        try:
            response = self.agent.run(msg)

            #add agent's response to the history
            self.history.append(
                Message(
                    "Agent",
                    response
                )
            )

            #if streaming is = True
            if streaming:
                return self._stream_response(response)
            else:
                response

        except Exception as error:
            error_message = f"Error processing message: {str(error)}"

            #add error to history
            self.history.append(
                Message(
                    "Agent",
                    error_message
                )
            )

            return error_message
    
    def _stream_response(
        self, 
        response: str = None
    ):
        """
        Yield the response token by token (word by word)
        
        Usage:
        --------------
        for token in _stream_response(response):
            print(token)
        
        """
        for token in response.split():
            yield token