Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from PIL import Image | |
import numpy as np | |
from copy import deepcopy | |
import cv2 | |
from objctrl_2_5d.utils.vis_camera import vis_camera_rescale | |
from objctrl_2_5d.utils.objmask_util import trajectory_to_camera_poses_v1 | |
from objctrl_2_5d.utils.customized_cam import rotation, clockwise, pan_and_zoom | |
CAMERA_MODE = ["None", "ZoomIn", "ZoomOut", "PanRight", "PanLeft", "TiltUp", "TiltDown", "ClockWise", "Anti-CW", "Rotate60"] | |
zc_threshold = 0.2 | |
depth_scale_ = 5.2 | |
center_margin = 10 | |
height, width = 320, 576 | |
num_frames = 14 | |
intrinsics = np.array([[float(width), float(width), float(width) / 2, float(height) / 2]]) | |
intrinsics = np.repeat(intrinsics, num_frames, axis=0) # [n_frame, 4] | |
fx = intrinsics[0, 0] / width | |
fy = intrinsics[0, 1] / height | |
cx = intrinsics[0, 2] / width | |
cy = intrinsics[0, 3] / height | |
def process_image(raw_image): | |
image, points = raw_image['image'], raw_image['points'] | |
try: | |
assert(len(points)) == 1, "Please select only one point" | |
[x1, y1, _, x2, y2, _] = points[0] | |
image = image.crop((x1, y1, x2, y2)) | |
image = image.resize((width, height)) | |
except: | |
image = image.resize((width, height)) | |
return image, gr.update(value={'image': image}) | |
# -------------- general UI functionality -------------- | |
def get_subject_points(canvas): | |
return canvas["image"], canvas["points"] | |
def mask_image(image, | |
mask, | |
color=[255,0,0], | |
alpha=0.5): | |
""" Overlay mask on image for visualization purpose. | |
Args: | |
image (H, W, 3) or (H, W): input image | |
mask (H, W): mask to be overlaid | |
color: the color of overlaid mask | |
alpha: the transparency of the mask | |
""" | |
out = deepcopy(image) | |
img = deepcopy(image) | |
img[mask == 1] = color | |
out = cv2.addWeighted(img, alpha, out, 1-alpha, 0, out) | |
return out | |
def get_points(img, | |
sel_pix, | |
evt: gr.SelectData): | |
# collect the selected point | |
img = np.array(img) | |
img = deepcopy(img) | |
sel_pix.append(evt.index) | |
# only draw the last two points | |
# if len(sel_pix) > 2: | |
# sel_pix = sel_pix[-2:] | |
points = [] | |
for idx, point in enumerate(sel_pix): | |
if idx % 2 == 0: | |
# draw a red circle at the handle point | |
cv2.circle(img, tuple(point), 10, (255, 0, 0), -1) | |
else: | |
# draw a blue circle at the handle point | |
cv2.circle(img, tuple(point), 10, (0, 0, 255), -1) | |
points.append(tuple(point)) | |
# draw an arrow from handle point to target point | |
# if len(points) == idx + 1: | |
if idx > 0: | |
line_length = np.sqrt((points[idx][0] - points[idx-1][0])**2 + (points[idx][1] - points[idx-1][1])**2) | |
arrow_head_length = 10 | |
tip_length = arrow_head_length / line_length | |
cv2.arrowedLine(img, points[idx-1], points[idx], (0, 255, 0), 4, tipLength=tip_length) | |
# points = [] | |
return img if isinstance(img, np.ndarray) else np.array(img), sel_pix | |
# clear all handle/target points | |
def undo_points(original_image): | |
return original_image, [] | |
def interpolate_points(points, num_points): | |
x = points[:, 0] | |
y = points[:, 1] | |
# Interpolating the points | |
t = np.linspace(0, 1, len(points)) | |
t_new = np.linspace(0, 1, num_points) | |
x_new = np.interp(t_new, t, x) | |
y_new = np.interp(t_new, t, y) | |
return np.vstack((x_new, y_new)).T # [] | |
def traj2cam(traj, depth, rescale): | |
if len(traj) == 0: | |
return None, None, 0.0, gr.update(value=CAMERA_MODE[0]) | |
traj = np.array(traj) | |
trajectory = interpolate_points(traj, num_frames) | |
center_h_margin, center_w_margin = center_margin, center_margin | |
depth_center = np.mean(depth[height//2-center_h_margin:height//2+center_h_margin, width//2-center_w_margin:width//2+center_w_margin]) | |
if rescale == 0: | |
rescale = 1 | |
depth_rescale = round(depth_scale_ * rescale / depth_center, 2) | |
r_depth = depth * depth_rescale | |
zc = [] | |
for i in range(num_frames): | |
zc.append(r_depth[int(trajectory[i][1]), int(trajectory[i][0])]) | |
# print(f'zc: {zc}') | |
## norm zc | |
zc_norm = np.array(zc) | |
zc_grad = zc_norm[1:] - zc_norm[:-1] | |
zc_grad = np.abs(zc_grad) | |
zc_grad = zc_grad[1:] - zc_grad[:-1] | |
zc_grad_std = np.std(zc_grad) | |
if zc_grad_std > zc_threshold: | |
zc = [zc[0]] * num_frames | |
# print(f'zc_grad_std: {zc_grad_std}, zc_threshold: {zc_threshold}') | |
# print(f'zc: {zc}') | |
traj_w2c = trajectory_to_camera_poses_v1(trajectory, intrinsics, num_frames, zc=zc) # numpy: [n_frame, 4, 4] | |
RTs = traj_w2c[:, :3] | |
fig = vis_camera_rescale(RTs) | |
return RTs, fig, rescale, gr.update(value=CAMERA_MODE[0]) | |
def get_rotate_cam(angle, depth): | |
# mean_depth = np.mean(depth * mask) | |
center_h_margin, center_w_margin = center_margin, center_margin | |
depth_center = np.mean(depth[height//2-center_h_margin:height//2+center_h_margin, width//2-center_w_margin:width//2+center_w_margin]) | |
# print(f'rotate depth_center: {depth_center}') | |
RTs = rotation(num_frames, angle, depth_center, depth_center) | |
fig = vis_camera_rescale(RTs) | |
return RTs, fig | |
def get_clockwise_cam(angle, depth, mask): | |
# mask = mask.astype(np.float32) # [0, 1] | |
# mean_depth = np.mean(depth * mask) | |
# center_h_margin, center_w_margin = center_margin, center_margin | |
# depth_center = np.mean(depth[height//2-center_h_margin:height//2+center_h_margin, width//2-center_w_margin:width//2+center_w_margin]) | |
RTs = clockwise(angle, num_frames) | |
# RTs[:, -1, -1] = mean_depth | |
fig = vis_camera_rescale(RTs) | |
return RTs, fig | |
def get_translate_cam(Tx, Ty, Tz, depth, mask, speed): | |
# mask = mask.astype(np.float32) # [0, 1] | |
# mean_depth = np.mean(depth * mask) | |
T = np.array([Tx, Ty, Tz]) | |
T = T.reshape(3, 1) | |
T = T[None, ...].repeat(num_frames, axis=0) | |
RTs = pan_and_zoom(T, speed, n=num_frames) | |
# RTs[:, -1, -1] += mean_depth | |
fig = vis_camera_rescale(RTs) | |
return RTs, fig | |
def get_camera_pose(camera_mode): | |
# camera_mode = ["None", "ZoomIn", "ZoomOut", "PanLeft", "PanRight", "TiltUp", "TiltDown", "ClockWise", "Anti-CW", "Rotate60"] | |
def trigger_camera_pose(camera_option, depth, mask, rescale, angle, speed): | |
if camera_option == camera_mode[0]: # None | |
RTs = None | |
fig = None | |
elif camera_option == camera_mode[1]: # ZoomIn | |
RTs, fig = get_translate_cam(0, 0, -1, depth, mask, speed) | |
elif camera_option == camera_mode[2]: # ZoomOut | |
RTs, fig = get_translate_cam(0, 0, 1, depth, mask, speed) | |
elif camera_option == camera_mode[3]: # PanLeft | |
RTs, fig = get_translate_cam(-1, 0, 0, depth, mask, speed) | |
elif camera_option == camera_mode[4]: # PanRight | |
RTs, fig = get_translate_cam(1, 0, 0, depth, mask, speed) | |
elif camera_option == camera_mode[5]: # TiltUp | |
RTs, fig = get_translate_cam(0, 1, 0, depth, mask, speed) | |
elif camera_option == camera_mode[6]: # TiltDown | |
RTs, fig = get_translate_cam(0, -1, 0, depth, mask, speed) | |
elif camera_option == camera_mode[7]: # ClockWise | |
RTs, fig = get_clockwise_cam(-angle, depth, mask) | |
elif camera_option == camera_mode[8]: # Anti-CW | |
RTs, fig = get_clockwise_cam(angle, depth, mask) | |
else: # Rotate60 | |
RTs, fig = get_rotate_cam(angle, depth) | |
rescale = 0.0 | |
return RTs, fig, rescale | |
return trigger_camera_pose | |
import os | |
from glob import glob | |
import json | |
def get_mid_params(raw_input, canvas, mask, selected_points, camera_option, bg_mode, shared_wapring_latents, generated_video): | |
output_dir = "./assets/examples" | |
os.makedirs(output_dir, exist_ok=True) | |
# folders = sorted(glob(output_dir + "/*")) | |
folders = os.listdir(output_dir) | |
folders = [int(folder) for folder in folders if os.path.isdir(os.path.join(output_dir, folder))] | |
num = sorted(folders)[-1] + 1 if folders else 0 | |
fout = open(os.path.join(output_dir, f'examples.json'), 'a+') | |
cur_folder = os.path.join(output_dir, f'{num:05d}') | |
os.makedirs(cur_folder, exist_ok=True) | |
raw_image = raw_input['image'] | |
raw_points = raw_input['points'] | |
seg_image = canvas['image'] | |
seg_points = canvas['points'] | |
mask = Image.fromarray(mask) | |
mask_path = os.path.join(cur_folder, 'mask.png') | |
mask.save(mask_path) | |
raw_image_path = os.path.join(cur_folder, 'raw_image.png') | |
seg_image_path = os.path.join(cur_folder, 'seg_image.png') | |
raw_image.save(os.path.join(cur_folder, 'raw_image.png')) | |
seg_image.save(os.path.join(cur_folder, 'seg_image.png')) | |
gen_path = os.path.join(cur_folder, 'generated_video.mp4') | |
cmd = f"cp {generated_video} {gen_path}" | |
os.system(cmd) | |
# data = [{'image': raw_image_path, 'points': raw_points}, | |
# {'image': seg_image_path, 'points': seg_points}, | |
# mask_path, | |
# str(selected_points), | |
# camera_option, | |
# bg_mode, | |
# gen_path] | |
data = {f'{num:05d}': [{'image': raw_image_path}, | |
str(raw_points), | |
{'image': seg_image_path}, | |
str(seg_points), | |
mask_path, | |
str(selected_points), | |
camera_option, | |
bg_mode, | |
shared_wapring_latents, | |
gen_path]} | |
fout.write(json.dumps(data) + '\n') | |
fout.close() | |