Spaces:

ZackBradshaw
/

omni_bot

Runtime error

Zack Zitting Bradshaw

Upload folder using huggingface_hub

4962437 8 months ago

No virus

4.25 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	from swarms.agents.message import Message


	class Mistral:
	"""
	Mistral

	model = Mistral(device="cuda", use_flash_attention=True, temperature=0.7, max_length=200)
	task = "My favourite condiment is"
	result = model.run(task)
	print(result)
	"""
	def __init__(
	self,
	ai_name: str = "Node Model Agent",
	system_prompt: str = None,
	model_name: str ="mistralai/Mistral-7B-v0.1",
	device: str ="cuda",
	use_flash_attention: bool = False,
	temperature: float = 1.0,
	max_length: int = 100,
	do_sample: bool = True
	):
	self.ai_name = ai_name
	self.system_prompt = system_prompt
	self.model_name = model_name
	self.device = device
	self.use_flash_attention = use_flash_attention
	self.temperature = temperature
	self.max_length = max_length

	# Check if the specified device is available
	if not torch.cuda.is_available() and device == "cuda":
	raise ValueError("CUDA is not available. Please choose a different device.")

	# Load the model and tokenizer
	self.model = None
	self.tokenizer = None
	self.load_model()

	self.history = []

	def load_model(self):
	try:
	self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
	self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
	self.model.to(self.device)
	except Exception as e:
	raise ValueError(f"Error loading the Mistral model: {str(e)}")

	def run(
	self,
	task: str
	):
	"""Run the model on a given task."""

	try:
	model_inputs = self.tokenizer(
	[task],
	return_tensors="pt"
	).to(self.device)
	generated_ids = self.model.generate(
	**model_inputs,
	max_length=self.max_length,
	do_sample=self.do_sample,
	temperature=self.temperature,
	max_new_tokens=self.max_length
	)
	output_text = self.tokenizer.batch_decode(generated_ids)[0]
	return output_text
	except Exception as e:
	raise ValueError(f"Error running the model: {str(e)}")

	def chat(
	self,
	msg: str = None,
	streaming: bool = False
	):
	"""
	Run chat

	Args:
	msg (str, optional): Message to send to the agent. Defaults to None.
	language (str, optional): Language to use. Defaults to None.
	streaming (bool, optional): Whether to stream the response. Defaults to False.

	Returns:
	str: Response from the agent

	Usage:
	--------------
	agent = MultiModalAgent()
	agent.chat("Hello")

	"""

	#add users message to the history
	self.history.append(
	Message(
	"User",
	msg
	)
	)

	#process msg
	try:
	response = self.agent.run(msg)

	#add agent's response to the history
	self.history.append(
	Message(
	"Agent",
	response
	)
	)

	#if streaming is = True
	if streaming:
	return self._stream_response(response)
	else:
	response

	except Exception as error:
	error_message = f"Error processing message: {str(error)}"

	#add error to history
	self.history.append(
	Message(
	"Agent",
	error_message
	)
	)

	return error_message

	def _stream_response(
	self,
	response: str = None
	):
	"""
	Yield the response token by token (word by word)

	Usage:
	--------------
	for token in _stream_response(response):
	print(token)

	"""
	for token in response.split():
	yield token