Spaces:

ZackBradshaw
/

omni_bot

Runtime error

App Files Files Community

omni_bot / swarms /agents /omni_modal_agent.py

Zack Zitting Bradshaw

Upload folder using huggingface_hub

4962437 10 months ago

raw

history blame contribute delete

No virus

5.05 kB

	from typing import Dict, List

	from langchain.base_language import BaseLanguageModel
	from langchain.tools.base import BaseTool
	from langchain_experimental.autonomous_agents.hugginggpt.repsonse_generator import (
	load_response_generator,
	)
	from langchain_experimental.autonomous_agents.hugginggpt.task_executor import (
	TaskExecutor,
	)
	from langchain_experimental.autonomous_agents.hugginggpt.task_planner import (
	load_chat_planner,
	)
	from transformers import load_tool
	from swarms.agents.message import Message


	class Step:
	def __init__(
	self,
	task: str,
	id: int,
	dep: List[int],
	args: Dict[str, str],
	tool: BaseTool
	):
	self.task = task
	self.id = id
	self.dep = dep
	self.args = args
	self.tool = tool

	class Plan:
	def __init__(
	self,
	steps: List[Step]
	):
	self.steps = steps

	def __str__(self) -> str:
	return str([str(step) for step in self.steps])

	def __repr(self) -> str:
	return str(self)





	class OmniModalAgent:
	"""
	OmniModalAgent
	LLM -> Plans -> Tasks -> Tools -> Response

	Architecture:
	1. LLM: Language Model
	2. Chat Planner: Plans
	3. Task Executor: Tasks
	4. Tools: Tools

	Args:
	llm (BaseLanguageModel): Language Model
	tools (List[BaseTool]): List of tools

	Returns:
	str: response

	Usage:
	from swarms import OmniModalAgent, OpenAIChat,

	llm = OpenAIChat()
	agent = OmniModalAgent(llm)
	response = agent.run("Hello, how are you? Create an image of how your are doing!")
	"""
	def __init__(
	self,
	llm: BaseLanguageModel,
	# tools: List[BaseTool]
	):
	self.llm = llm

	print("Loading tools...")
	self.tools = [
	load_tool(tool_name)
	for tool_name in [
	"document-question-answering",
	"image-captioning",
	"image-question-answering",
	"image-segmentation",
	"speech-to-text",
	"summarization",
	"text-classification",
	"text-question-answering",
	"translation",
	"huggingface-tools/text-to-image",
	"huggingface-tools/text-to-video",
	"text-to-speech",
	"huggingface-tools/text-download",
	"huggingface-tools/image-transformation",
	]
	]

	self.chat_planner = load_chat_planner(llm)
	self.response_generator = load_response_generator(llm)
	# self.task_executor = TaskExecutor
	self.history = []


	def run(
	self,
	input: str
	) -> str:
	"""Run the OmniAgent"""
	plan = self.chat_planner.plan(
	inputs={
	"input": input,
	"hf_tools": self.tools,
	}
	)
	self.task_executor = TaskExecutor(plan)
	self.task_executor.run()

	response = self.response_generator.generate(
	{"task_execution": self.task_executor}
	)

	return response

	def chat(
	self,
	msg: str = None,
	streaming: bool = False
	):
	"""
	Run chat

	Args:
	msg (str, optional): Message to send to the agent. Defaults to None.
	language (str, optional): Language to use. Defaults to None.
	streaming (bool, optional): Whether to stream the response. Defaults to False.

	Returns:
	str: Response from the agent

	Usage:
	--------------
	agent = MultiModalAgent()
	agent.chat("Hello")

	"""

	#add users message to the history
	self.history.append(
	Message(
	"User",
	msg
	)
	)

	#process msg
	try:
	response = self.agent.run(msg)

	#add agent's response to the history
	self.history.append(
	Message(
	"Agent",
	response
	)
	)

	#if streaming is = True
	if streaming:
	return self._stream_response(response)
	else:
	response

	except Exception as error:
	error_message = f"Error processing message: {str(error)}"

	#add error to history
	self.history.append(
	Message(
	"Agent",
	error_message
	)
	)

	return error_message

	def _stream_response(
	self,
	response: str = None
	):
	"""
	Yield the response token by token (word by word)

	Usage:
	--------------
	for token in _stream_response(response):
	print(token)

	"""
	for token in response.split():
	yield token