Spaces:

pivot-prompt
/

pivot-prompt-demo

Runtime error

App Files Files Community

pivot-prompt-demo / vip_runner.py

pivot-prompt

Add application file

660daa9 10 months ago

raw

history blame

4.78 kB

	"""VIP."""

	import json
	import re

	import cv2
	from tqdm import trange
	import numpy as np
	import vip


	def make_prompt(description, top_n=3):
	return f"""
	INSTRUCTIONS:
	You are tasked to locate an object, region, or point in space in the given annotated image according to a description.
	The image is annoated with numbered circles.
	Choose the top {top_n} circles that have the most overlap with and/or is closest to what the description is describing in the image.
	You are a five-time world champion in this game.
	Give a one sentence analysis of why you chose those points.
	Provide your answer at the end in a valid JSON of this format:

	{{"points": []}}

	DESCRIPTION: {description}
	IMAGE:
	""".strip()


	def extract_json(response, key):
	json_part = re.search(r"\{.*\}", response, re.DOTALL)
	parsed_json = {}
	if json_part:
	json_data = json_part.group()
	# Parse the JSON data
	parsed_json = json.loads(json_data)
	else:
	print("No JSON data found ******\n", response)
	return parsed_json[key]


	def vip_perform_selection(prompter, vlm, im, desc, arm_coord, samples, top_n):
	"""Perform one selection pass given samples."""
	image_circles_np = prompter.add_arrow_overlay_plt(
	image=im, samples=samples, arm_xy=arm_coord, log_image=False
	)

	_, encoded_image_circles = cv2.imencode(".png", image_circles_np)

	prompt_seq = [make_prompt(desc, top_n=top_n), encoded_image_circles]
	response = vlm.query(prompt_seq)

	try:
	arrow_ids = extract_json(response, "points")
	except Exception as e:
	print(e)
	arrow_ids = []
	return arrow_ids, image_circles_np


	def vip_runner(
	vlm,
	im,
	desc,
	style,
	action_spec,
	n_samples_init=25,
	n_samples_opt=10,
	n_iters=3,
	n_parallel_trials=1,
	):
	"""VIP."""

	prompter = vip.VisualIterativePrompter(
	style, action_spec, vip.SupportedEmbodiments.HF_DEMO
	)

	output_ims = []
	arm_coord = (int(im.shape[1] / 2), int(im.shape[0] / 2))

	new_samples = []
	center_mean = action_spec["loc"]
	for i in range(n_parallel_trials):
	center_mean = action_spec["loc"]
	center_std = action_spec["scale"]
	for itr in trange(n_iters):
	if itr == 0:
	style["num_samples"] = n_samples_init
	else:
	style["num_samples"] = n_samples_opt
	samples = prompter.sample_actions(im, arm_coord, center_mean, center_std)
	arrow_ids, image_circles_np = vip_perform_selection(
	prompter, vlm, im, desc, arm_coord, samples, top_n=3
	)

	# plot sampled circles as red
	selected_samples = []
	for selected_id in arrow_ids:
	sample = samples[selected_id]
	sample.coord.color = (255, 0, 0)
	selected_samples.append(sample)
	image_circles_marked_np = prompter.add_arrow_overlay_plt(
	image_circles_np, selected_samples, arm_coord
	)
	output_ims.append(image_circles_marked_np)
	yield output_ims, f"Image generated for parallel sample {i+1}/{n_parallel_trials} iteration {itr+1}/{n_iters}. Still working..."

	# if at last iteration, pick one answer out of the selected ones
	if itr == n_iters - 1:
	arrow_ids, _ = vip_perform_selection(
	prompter, vlm, im, desc, arm_coord, selected_samples, top_n=1
	)

	selected_samples = []
	for selected_id in arrow_ids:
	sample = samples[selected_id]
	sample.coord.color = (255, 0, 0)
	selected_samples.append(sample)
	image_circles_marked_np = prompter.add_arrow_overlay_plt(
	im, selected_samples, arm_coord
	)
	output_ims.append(image_circles_marked_np)
	new_samples += selected_samples
	yield output_ims, f"Image generated for parallel sample {i+1}/{n_parallel_trials} last iteration. Still working..."
	center_mean, center_std = prompter.fit(arrow_ids, samples)

	if n_parallel_trials > 1:
	# adjust sample label to avoid duplications
	for sample_id in range(len(new_samples)):
	new_samples[sample_id].label = str(sample_id)
	arrow_ids, _ = vip_perform_selection(
	prompter, vlm, im, desc, arm_coord, new_samples, top_n=1
	)

	selected_samples = []
	for selected_id in arrow_ids:
	sample = new_samples[selected_id]
	sample.coord.color = (255, 0, 0)
	selected_samples.append(sample)
	image_circles_marked_np = prompter.add_arrow_overlay_plt(
	im, selected_samples, arm_coord
	)
	output_ims.append(image_circles_marked_np)
	center_mean, _ = prompter.fit(arrow_ids, new_samples)

	if output_ims:
	yield (
	output_ims,
	(
	"Final selected coordinate:"
	f" {np.round(prompter.action_to_coord(center_mean, im, arm_coord).xy, decimals=0)}"
	),
	)
	return [], "Unable to understand query"