Spaces:

charles-azam
/

deepdraft

Runtime error

deepdraft / src /deepengineer /deepsearch /draw_agent.py

Charles Azam

feat: add logging mechanism

0159aaf 4 months ago

8.66 kB

	from __future__ import annotations

	from pathlib import Path
	from io import BytesIO
	from time import sleep

	import matplotlib

	matplotlib.use("Agg") # headless backend
	import matplotlib.pyplot as plt
	from PIL import Image

	from smolagents import CodeAgent, LiteLLMModel
	from smolagents.agents import ActionStep
	from deepengineer.webcrawler.crawl_database import DataBase
	from deepengineer.logging_tools import LoggingTool
	import queue


	def _find_and_save_matplotlib_figure(image_path: Path = Path("figure.png")) -> str:
	"""Save the current matplotlib figure to path.
	Save fig takes no arguments. The output path is hardcoded to "figure.png".
	"""
	if not plt.get_fignums():
	raise RuntimeError(
	"No active figure to save; create one before calling save_fig()."
	)
	plt.savefig(image_path, bbox_inches="tight")
	return f"Figure saved to {image_path}."


	class SaveMatplotlibFigTool(LoggingTool):
	name = "save_matplotlib_fig"
	description = """Save the current matplotlib figure to the current directory. Then plt.close() is called to clear the figure. The image is returned as a markdown string, use this markdown inside the final answer to include the image.
	"""
	inputs = {
	"image_name": {
	"type": "string",
	"description": "The name of the image to save.",
	},
	}
	output_type = "string"

	def __init__(self, output_dir: Path, log_queue: queue.Queue \| None = None):
	super().__init__(log_queue=log_queue)
	self.output_dir: Path = output_dir

	def forward(self, image_name: str) -> str:
	self.push_log(f"🖼️ Saving matplotlib figure to {image_name}")
	if not image_name.endswith(".png"):
	image_name = image_name + ".png"
	output_path = self.output_dir / image_name
	output_path.unlink(missing_ok=True)
	_find_and_save_matplotlib_figure(output_path)
	if output_path.exists():
	return f"![]({image_name})"
	else:
	return f"Error: The image {image_name} was not saved."


	def _capture_snapshot(
	memory_step: ActionStep, agent: CodeAgent, image_path: Path = Path("figure.png")
	) -> None:
	_find_and_save_matplotlib_figure(image_path)
	if not plt.get_fignums():
	return

	buf = BytesIO()
	plt.savefig(buf, format="png", bbox_inches="tight")
	buf.seek(0)
	img = Image.open(buf)

	for prev in agent.memory.steps:
	if (
	isinstance(prev, ActionStep)
	and prev.step_number <= memory_step.step_number - 2
	):
	prev.observations_images = None

	memory_step.observations_images = [img.copy()]
	buf.close()

	hint = "[snapshot: matplotlib figure captured]"
	memory_step.observations = (
	hint
	if memory_step.observations is None
	else memory_step.observations + "\n" + hint
	)


	matplotlib_instructions_multiple_steps = r"""
	You may use the entire matplotlib and numpy and pandas and seaborn API. Do not worry about saving the image, it is done automatically and you can't access the os library.

	Between each step, the image is provided in memory. From step 2, you can use it to pass additional instructions to the model to improve the image.

	Workflow
	--------
	1. Construct your figure with ordinary matplotlib calls.
	2. Wait another iteration, watch the image. If the image is correct call `final_answer() directly`. Otherwise, just do it again.
	3. Do not call `plt.show()`; a callback captures a PNG automatically.
	4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).

	User instructions:
	{user_instructions}
	"""

	matplotlib_instructions_single_step = r"""
	You may use the entire matplotlib and numpy and pandas and seaborn API. Do not worry about saving the image, it is done automatically and you can't access the os library.

	Workflow
	--------
	1. Construct your figure with ordinary matplotlib calls.
	2. If the task is easy and you are confident that the image is correct, call `final_answer() directly`. Otherwise, wait another iteration to watch the image.
	3. Do not call `plt.show()`; a callback captures a PNG automatically.
	4. Keep code blocks concise and avoid GUI back‑end imports (TkAgg, Qt, etc.).

	User instructions:
	{user_instructions}
	"""


	def draw_matplotlib_image_from_prompt(
	prompt: str,
	image_path: str = Path("figure.png"),
	model_id: str = "mistral/mistral-medium-latest",
	multiple_steps: bool = False,
	) -> Path:
	model = LiteLLMModel(model_id=model_id)
	agent = CodeAgent(
	tools=[],
	model=model,
	additional_authorized_imports=[
	"matplotlib.*",
	"numpy.*",
	"pandas.*",
	"seaborn.*",
	],
	step_callbacks=[
	lambda memory_step, agent: _capture_snapshot(memory_step, agent, image_path)
	],
	max_steps=20,
	verbosity_level=2,
	)
	if multiple_steps:
	agent.run(
	matplotlib_instructions_multiple_steps.format(user_instructions=prompt)
	)
	else:
	agent.run(matplotlib_instructions_single_step.format(user_instructions=prompt))
	return image_path


	class DrawImageTool(LoggingTool):
	name = "draw_image"
	description = f"Draw an image based on a prompt. The image is saved in the current directory. The image is returned as a markdown image, use this markdown inside the final answer to include the image. You must be very specific in your prompt."
	inputs = {
	"prompt": {
	"type": "string",
	"description": """
	Draw an image based on a prompt. The image is saved in the current directory. The image is returned as a markdown image, use this markdown inside the final answer to include the image.

	You must be very specific in your prompt. This tool has access to matplotlib, numpy, pandas, seaborn.

	""",
	},
	"image_name": {
	"type": "string",
	"description": "The name of the image to save.",
	},
	}
	output_type = "string"

	def __init__(self, output_dir: Path):
	super().__init__()
	self.output_dir: Path = output_dir

	def forward(self, prompt: str, image_name: str) -> str:
	self.push_log(f"🖊️ Drawing image from prompt: {prompt}")
	if not image_name.endswith(".png"):
	image_name = image_name + ".png"
	output_path = draw_matplotlib_image_from_prompt(
	prompt, self.output_dir / image_name
	)
	if output_path.exists():
	return f"![]({image_name})"
	else:
	return f"Error: The image {image_name} was not saved."


	def multiple_steps_draw_image_agent(
	prompt: str,
	image_path: str = Path("figure.png"),
	model_id: str = "mistral/mistral-medium-latest",
	) -> Path:
	"""
	The idea behind this function is to give to a multimodal agent the code and the image of the previous step to adapt it.
	"""
	from smolagents import CodeAgent, ActionStep, TaskStep, Timing
	import time

	model = LiteLLMModel(model_id=model_id)
	agent = CodeAgent(
	tools=[],
	model=model,
	additional_authorized_imports=["matplotlib.", "numpy."],
	step_callbacks=[
	lambda memory_step, agent: _capture_snapshot(memory_step, agent, image_path)
	],
	max_steps=20,
	verbosity_level=2,
	)

	# Send the tools to the agent (no tools here)
	agent.python_executor.send_tools({**agent.tools})

	# Print the system prompt
	print(agent.memory.system_prompt)

	# Set the task
	task = prompt

	# You could modify the memory as needed here by inputting the memory of another agent.
	# agent.memory.steps = previous_agent.memory.steps

	# Let's start a new task!
	agent.memory.steps.append(TaskStep(task=task, task_images=[]))

	final_answer = None
	step_number = 1
	while final_answer is None and step_number <= 10:
	memory_step = ActionStep(
	step_number=step_number,
	observations_images=[],
	timing=Timing(start_time=time.time(), end_time=time.time()),
	)
	# Run one step.
	final_answer = agent.step(memory_step)
	agent.memory.steps.append(memory_step)
	step_number += 1
	_capture_snapshot(memory_step, agent, image_path)
	pass
	# Change the memory as you please!
	# For instance to update the latest step:
	# agent.memory.steps[-1] = ...

	print("The final answer is:", final_answer)

	return image_path