Epoching's picture
Update app.py
54fac6b
# Repo source: https://github.com/vt-vl-lab/3d-photo-inpainting
#import os
#os.environ['QT_DEBUG_PLUGINS'] = '1'
import subprocess
#subprocess.run('ldd /home/user/.local/lib/python3.8/site-packages/PyQt5/Qt/plugins/platforms/libqxcb.so', shell=True)
#subprocess.run('pip list', shell=True)
subprocess.run('nvidia-smi', shell=True)
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1920, 1080)).start()
#subprocess.run('echo $DISPLAY', shell=True)
# 3d inpainting imports
import numpy as np
import argparse
import glob
import os
from functools import partial
import vispy
import scipy.misc as misc
from tqdm import tqdm
import yaml
import time
import sys
from mesh import write_ply, read_ply, output_3d_photo
from utils import get_MiDaS_samples, read_MiDaS_depth
import torch
import cv2
from skimage.transform import resize
import imageio
import copy
from networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net
from MiDaS.run import run_depth
from boostmonodepth_utils import run_boostmonodepth
from MiDaS.monodepth_net import MonoDepthNet
import MiDaS.MiDaS_utils as MiDaS_utils
from bilateral_filtering import sparse_bilateral_filtering
import torch
# gradio imports
import gradio as gr
import uuid
from PIL import Image
from pathlib import Path
import shutil
from time import sleep
def inpaint(img_name, num_frames, fps):
config = yaml.load(open('argument.yml', 'r'))
config['num_frames'] = num_frames
config['fps'] = fps
if torch.cuda.is_available():
config['gpu_ids'] = 0
if config['offscreen_rendering'] is True:
vispy.use(app='egl')
os.makedirs(config['mesh_folder'], exist_ok=True)
os.makedirs(config['video_folder'], exist_ok=True)
os.makedirs(config['depth_folder'], exist_ok=True)
sample_list = get_MiDaS_samples(config['src_folder'], config['depth_folder'], config, config['specific'], img_name.stem)
normal_canvas, all_canvas = None, None
if isinstance(config["gpu_ids"], int) and (config["gpu_ids"] >= 0):
device = config["gpu_ids"]
else:
device = "cpu"
print(f"running on device {device}")
for idx in tqdm(range(len(sample_list))):
depth = None
sample = sample_list[idx]
print("Current Source ==> ", sample['src_pair_name'])
mesh_fi = os.path.join(config['mesh_folder'], sample['src_pair_name'] +'.ply')
image = imageio.imread(sample['ref_img_fi'])
print(f"Running depth extraction at {time.time()}")
if config['use_boostmonodepth'] is True:
run_boostmonodepth(sample['ref_img_fi'], config['src_folder'], config['depth_folder'])
elif config['require_midas'] is True:
run_depth([sample['ref_img_fi']], config['src_folder'], config['depth_folder'],
config['MiDaS_model_ckpt'], MonoDepthNet, MiDaS_utils, target_w=640)
if 'npy' in config['depth_format']:
config['output_h'], config['output_w'] = np.load(sample['depth_fi']).shape[:2]
else:
config['output_h'], config['output_w'] = imageio.imread(sample['depth_fi']).shape[:2]
frac = config['longer_side_len'] / max(config['output_h'], config['output_w'])
config['output_h'], config['output_w'] = int(config['output_h'] * frac), int(config['output_w'] * frac)
config['original_h'], config['original_w'] = config['output_h'], config['output_w']
if image.ndim == 2:
image = image[..., None].repeat(3, -1)
if np.sum(np.abs(image[..., 0] - image[..., 1])) == 0 and np.sum(np.abs(image[..., 1] - image[..., 2])) == 0:
config['gray_image'] = True
else:
config['gray_image'] = False
image = cv2.resize(image, (config['output_w'], config['output_h']), interpolation=cv2.INTER_AREA)
depth = read_MiDaS_depth(sample['depth_fi'], 3.0, config['output_h'], config['output_w'])
mean_loc_depth = depth[depth.shape[0]//2, depth.shape[1]//2]
if not(config['load_ply'] is True and os.path.exists(mesh_fi)):
vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), image.copy(), config, num_iter=config['sparse_iter'], spdb=False)
depth = vis_depths[-1]
model = None
torch.cuda.empty_cache()
print("Start Running 3D_Photo ...")
print(f"Loading edge model at {time.time()}")
depth_edge_model = Inpaint_Edge_Net(init_weights=True)
depth_edge_weight = torch.load(config['depth_edge_model_ckpt'],
map_location=torch.device(device))
depth_edge_model.load_state_dict(depth_edge_weight)
depth_edge_model = depth_edge_model.to(device)
depth_edge_model.eval()
print(f"Loading depth model at {time.time()}")
depth_feat_model = Inpaint_Depth_Net()
depth_feat_weight = torch.load(config['depth_feat_model_ckpt'],
map_location=torch.device(device))
depth_feat_model.load_state_dict(depth_feat_weight, strict=True)
depth_feat_model = depth_feat_model.to(device)
depth_feat_model.eval()
depth_feat_model = depth_feat_model.to(device)
print(f"Loading rgb model at {time.time()}")
rgb_model = Inpaint_Color_Net()
rgb_feat_weight = torch.load(config['rgb_feat_model_ckpt'],
map_location=torch.device(device))
rgb_model.load_state_dict(rgb_feat_weight)
rgb_model.eval()
rgb_model = rgb_model.to(device)
graph = None
print(f"Writing depth ply (and basically doing everything) at {time.time()}")
rt_info = write_ply(image,
depth,
sample['int_mtx'],
mesh_fi,
config,
rgb_model,
depth_edge_model,
depth_edge_model,
depth_feat_model)
if rt_info is False:
continue
rgb_model = None
color_feat_model = None
depth_edge_model = None
depth_feat_model = None
torch.cuda.empty_cache()
if config['save_ply'] is True or config['load_ply'] is True:
verts, colors, faces, Height, Width, hFov, vFov = read_ply(mesh_fi)
else:
verts, colors, faces, Height, Width, hFov, vFov = rt_info
print(f"Making video at {time.time()}")
videos_poses, video_basename = copy.deepcopy(sample['tgts_poses']), sample['tgt_name']
top = (config.get('original_h') // 2 - sample['int_mtx'][1, 2] * config['output_h'])
left = (config.get('original_w') // 2 - sample['int_mtx'][0, 2] * config['output_w'])
down, right = top + config['output_h'], left + config['output_w']
border = [int(xx) for xx in [top, down, left, right]]
normal_canvas, all_canvas = output_3d_photo(verts.copy(), colors.copy(), faces.copy(), copy.deepcopy(Height), copy.deepcopy(Width), copy.deepcopy(hFov), copy.deepcopy(vFov),
copy.deepcopy(sample['tgt_pose']), sample['video_postfix'], copy.deepcopy(sample['ref_pose']), copy.deepcopy(config['video_folder']),
image.copy(), copy.deepcopy(sample['int_mtx']), config, image,
videos_poses, video_basename, config.get('original_h'), config.get('original_w'), border=border, depth=depth, normal_canvas=normal_canvas, all_canvas=all_canvas,
mean_loc_depth=mean_loc_depth)
def resizer(input_img, max_img_size=512):
width, height = input_img.size
long_edge = height if height >= width else width
if long_edge > max_img_size:
ratio = max_img_size / long_edge
resized_width = int(ratio * width)
resized_height = int(ratio * height)
resized_input_img = input_img.resize((resized_width, resized_height), resample=2)
return resized_input_img
else:
return input_img
def main_app(input_img, num_frames, fps):
# resize down
input_img = resizer(input_img)
# Save image in necessary folder for inpainting
#img_name = Path(str(uuid.uuid4()) + '.jpg')
img_name = Path('sample.jpg')
save_folder = Path('image')
input_img.save(save_folder/img_name)
inpaint(img_name, num_frames, fps)
#subprocess.run('ls -l', shell=True)
#subprocess.run('ls image -l', shell=True)
#subprocess.run('ls video/ -l', shell=True)
# Get output video path & return
input_img_path = str(save_folder/img_name)
out_vid_path = 'video/{0}_circle.mp4'.format(img_name.stem)
return out_vid_path
video_choices = ['dolly-zoom-in', 'zoom-in', 'circle', 'swing']
gradio_inputs = [gr.inputs.Image(type='pil', label='Input Image'),
gr.inputs.Slider(minimum=60, maximum=240, step=1, default=120, label="Number of Frames"),
gr.inputs.Slider(minimum=10, maximum=40, step=1, default=20, label="Frames per Second (FPS)")]
gradio_outputs = [gr.outputs.Video(label='Output Video')]
examples = [ ['moon.jpg'], ['dog.jpg'] ]
description="Convert an image into a trajectory-following video. Images are automatically resized down to a max edge of 512. | NOTE: The current runtime for a sample is around 400-700 seconds. Running on a lower number of frames could help! Do be patient as this is on CPU-only, BUT if this space maybe gets a GPU one day, it's already configured to run with GPU-support :) If you have a GPU, feel free to use the author's original repo (linked at the bottom of this path, they have a collab notebook!) You can also run this space/gradio app locally!"
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2004.04727' target='_blank'>3D Photography using Context-aware Layered Depth Inpainting</a> | <a href='https://shihmengli.github.io/3D-Photo-Inpainting/' target='_blank'>Github Project Page</a> | <a href='https://github.com/vt-vl-lab/3d-photo-inpainting' target='_blank'>Github Repo</a></p>"
iface = gr.Interface(fn=main_app, inputs=gradio_inputs , outputs=gradio_outputs, examples=examples,
title='3D Image Inpainting',
description=description,
article=article,
enable_queue=True)
iface.launch(debug=True)