gif maker
Browse files- src/proxy_lite/cli.py +7 -0
- src/proxy_lite/configs/default.yaml +2 -0
- src/proxy_lite/gif_marker.py +124 -0
- src/proxy_lite/recorder.py +5 -0
src/proxy_lite/cli.py
CHANGED
|
@@ -6,6 +6,7 @@ from pathlib import Path
|
|
| 6 |
from typing import Optional
|
| 7 |
|
| 8 |
from proxy_lite import Runner, RunnerConfig
|
|
|
|
| 9 |
from proxy_lite.logger import logger
|
| 10 |
|
| 11 |
|
|
@@ -46,6 +47,12 @@ def do_command(args):
|
|
| 46 |
f.write(base64.b64decode(final_screenshot))
|
| 47 |
logger.info(f"🤖 Screenshot saved to {path}")
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
def main():
|
| 51 |
parser = argparse.ArgumentParser(description="Proxy-Lite")
|
|
|
|
| 6 |
from typing import Optional
|
| 7 |
|
| 8 |
from proxy_lite import Runner, RunnerConfig
|
| 9 |
+
from proxy_lite.gif_marker import create_run_gif
|
| 10 |
from proxy_lite.logger import logger
|
| 11 |
|
| 12 |
|
|
|
|
| 47 |
f.write(base64.b64decode(final_screenshot))
|
| 48 |
logger.info(f"🤖 Screenshot saved to {path}")
|
| 49 |
|
| 50 |
+
gif_folder_path = Path(__file__).parent.parent.parent / "gifs"
|
| 51 |
+
gif_folder_path.mkdir(parents=True, exist_ok=True)
|
| 52 |
+
gif_path = gif_folder_path / f"{result.run_id}.gif"
|
| 53 |
+
create_run_gif(result, gif_path, duration=1500)
|
| 54 |
+
logger.info(f"🤖 GIF saved to {gif_path}")
|
| 55 |
+
|
| 56 |
|
| 57 |
def main():
|
| 58 |
parser = argparse.ArgumentParser(description="Proxy-Lite")
|
src/proxy_lite/configs/default.yaml
CHANGED
|
@@ -18,4 +18,6 @@ solver:
|
|
| 18 |
api_base: https://convergence-ai-demo-api.hf.space/v1
|
| 19 |
local_view: true
|
| 20 |
task_timeout: 1800
|
|
|
|
|
|
|
| 21 |
verbose: true
|
|
|
|
| 18 |
api_base: https://convergence-ai-demo-api.hf.space/v1
|
| 19 |
local_view: true
|
| 20 |
task_timeout: 1800
|
| 21 |
+
environment_timeout: 1800
|
| 22 |
+
action_timeout: 1800
|
| 23 |
verbose: true
|
src/proxy_lite/gif_marker.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
import re
|
| 3 |
+
import textwrap
|
| 4 |
+
from io import BytesIO
|
| 5 |
+
|
| 6 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 7 |
+
|
| 8 |
+
from proxy_lite.environments.environment_base import Action, Observation
|
| 9 |
+
from proxy_lite.recorder import Run
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def create_run_gif(
|
| 13 |
+
run: Run, output_path: str, white_panel_width: int = 300, duration: int = 2000, resize_factor: int = 4
|
| 14 |
+
) -> None:
|
| 15 |
+
"""
|
| 16 |
+
Generate a gif from the Run object's history.
|
| 17 |
+
|
| 18 |
+
For each Observation record, the observation image is decoded from its base64
|
| 19 |
+
encoded string. If the next record is an Action, its text is drawn onto a
|
| 20 |
+
white panel. The observation image and the white panel are then concatenated
|
| 21 |
+
horizontally to produce a frame.
|
| 22 |
+
|
| 23 |
+
Parameters:
|
| 24 |
+
run (Run): A Run object with its history containing Observation and Action records.
|
| 25 |
+
output_path (str): The path where the GIF will be saved.
|
| 26 |
+
white_panel_width (int): The width of the white panel for displaying text.
|
| 27 |
+
Default increased to 400 for larger images.
|
| 28 |
+
duration (int): Duration between frames in milliseconds.
|
| 29 |
+
Increased here to slow the FPS (default is 1000ms).
|
| 30 |
+
resize_factor (int): The factor to resize the image down by.
|
| 31 |
+
"""
|
| 32 |
+
frames = []
|
| 33 |
+
history = run.history
|
| 34 |
+
i = 0
|
| 35 |
+
while i < len(history):
|
| 36 |
+
if isinstance(history[i], Observation):
|
| 37 |
+
observation = history[i]
|
| 38 |
+
image_data = observation.state.image
|
| 39 |
+
if not image_data:
|
| 40 |
+
i += 1
|
| 41 |
+
continue
|
| 42 |
+
# Decode the base64 image
|
| 43 |
+
image_bytes = base64.b64decode(image_data)
|
| 44 |
+
obs_img = Image.open(BytesIO(image_bytes)).convert("RGB")
|
| 45 |
+
|
| 46 |
+
# scale the image down to 1/4 of its original size
|
| 47 |
+
obs_img = obs_img.resize((obs_img.width // resize_factor, obs_img.height // resize_factor))
|
| 48 |
+
|
| 49 |
+
# Check if the next record is an Action and extract its text if available
|
| 50 |
+
action_text = ""
|
| 51 |
+
if i + 1 < len(history) and isinstance(history[i + 1], Action):
|
| 52 |
+
action = history[i + 1]
|
| 53 |
+
if action.text:
|
| 54 |
+
action_text = action.text
|
| 55 |
+
|
| 56 |
+
# extract observation and thinking from tags in the action text
|
| 57 |
+
observation_match = re.search(r"<observation>(.*?)</observation>", action_text, re.DOTALL)
|
| 58 |
+
observation_content = observation_match.group(1).strip() if observation_match else None
|
| 59 |
+
|
| 60 |
+
# Extract text between thinking tags if present
|
| 61 |
+
thinking_match = re.search(r"<thinking>(.*?)</thinking>", action_text, re.DOTALL)
|
| 62 |
+
thinking_content = thinking_match.group(1).strip() if thinking_match else None
|
| 63 |
+
|
| 64 |
+
if observation_content and thinking_content:
|
| 65 |
+
action_text = f"Observation: {observation_content}\n\nThinking: {thinking_content}"
|
| 66 |
+
|
| 67 |
+
# Create a white panel (same height as the observation image)
|
| 68 |
+
panel = Image.new("RGB", (white_panel_width, obs_img.height), "white")
|
| 69 |
+
draw = ImageDraw.Draw(panel)
|
| 70 |
+
font = ImageFont.load_default()
|
| 71 |
+
|
| 72 |
+
# Wrap the action text if it is too long
|
| 73 |
+
max_chars_per_line = 40 # Adjusted for larger font size
|
| 74 |
+
wrapped_text = textwrap.fill(action_text, width=max_chars_per_line)
|
| 75 |
+
|
| 76 |
+
# Calculate text block size and center it on the panel
|
| 77 |
+
try:
|
| 78 |
+
# Use multiline_textbbox if available (returns bounding box tuple)
|
| 79 |
+
bbox = draw.multiline_textbbox((0, 0), wrapped_text, font=font)
|
| 80 |
+
text_width, text_height = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
| 81 |
+
except AttributeError:
|
| 82 |
+
# Fallback for older Pillow versions: compute size for each line
|
| 83 |
+
lines = wrapped_text.splitlines() or [wrapped_text]
|
| 84 |
+
line_sizes = [draw.textsize(line, font=font) for line in lines]
|
| 85 |
+
text_width = max(width for width, _ in line_sizes)
|
| 86 |
+
text_height = sum(height for _, height in line_sizes)
|
| 87 |
+
text_x = (white_panel_width - text_width) // 2
|
| 88 |
+
text_y = (obs_img.height - text_height) // 2
|
| 89 |
+
draw.multiline_text((text_x, text_y), wrapped_text, fill="black", font=font, align="center")
|
| 90 |
+
|
| 91 |
+
# Create the combined frame by concatenating the observation image and the panel
|
| 92 |
+
total_width = obs_img.width + white_panel_width
|
| 93 |
+
combined_frame = Image.new("RGB", (total_width, obs_img.height))
|
| 94 |
+
combined_frame.paste(obs_img, (0, 0))
|
| 95 |
+
combined_frame.paste(panel, (obs_img.width, 0))
|
| 96 |
+
frames.append(combined_frame)
|
| 97 |
+
|
| 98 |
+
# Skip the Action record since it has been processed with this Observation
|
| 99 |
+
if i + 1 < len(history) and isinstance(history[i + 1], Action):
|
| 100 |
+
i += 2
|
| 101 |
+
else:
|
| 102 |
+
i += 1
|
| 103 |
+
else:
|
| 104 |
+
i += 1
|
| 105 |
+
|
| 106 |
+
if frames:
|
| 107 |
+
frames[0].save(output_path, save_all=True, append_images=frames[1:], duration=duration, loop=0)
|
| 108 |
+
else:
|
| 109 |
+
raise ValueError("No frames were generated from the Run object's history.")
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# Example usage:
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
# This is a simple example to demonstrate usage.
|
| 115 |
+
# In practice, replace this with your actual Run object.
|
| 116 |
+
from proxy_lite.recorder import Run
|
| 117 |
+
|
| 118 |
+
dummy_run = Run.load("0abdb4cb-f289-48b0-ba13-35ed1210f7c1")
|
| 119 |
+
|
| 120 |
+
num_steps = int(len(dummy_run.history) / 2)
|
| 121 |
+
print(f"Number of steps: {num_steps}")
|
| 122 |
+
output_gif_path = "trajectory.gif"
|
| 123 |
+
create_run_gif(dummy_run, output_gif_path, duration=1000)
|
| 124 |
+
print(f"Trajectory GIF saved to {output_gif_path}")
|
src/proxy_lite/recorder.py
CHANGED
|
@@ -38,6 +38,11 @@ class Run(BaseModel):
|
|
| 38 |
created_at=str(datetime.datetime.now(datetime.UTC)),
|
| 39 |
)
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
@property
|
| 42 |
def observations(self) -> list[Observation]:
|
| 43 |
return [h for h in self.history if isinstance(h, Observation)]
|
|
|
|
| 38 |
created_at=str(datetime.datetime.now(datetime.UTC)),
|
| 39 |
)
|
| 40 |
|
| 41 |
+
@classmethod
|
| 42 |
+
def load(cls, run_id: str) -> Self:
|
| 43 |
+
with open(Path(__file__).parent.parent.parent / "local_trajectories" / f"{run_id}.json", "r") as f:
|
| 44 |
+
return cls(**json.load(f))
|
| 45 |
+
|
| 46 |
@property
|
| 47 |
def observations(self) -> list[Observation]:
|
| 48 |
return [h for h in self.history if isinstance(h, Observation)]
|