Spaces:

oguzakif
/

video-object-remover

Sleeping

App Files Files Community

video-object-remover / FGT_codes /tool /video_inpainting.py

oguzakif

init repo

d4b77ac over 1 year ago

raw

history blame

No virus

30.7 kB

	import cvbase
	from torchvision.transforms import ToTensor
	from get_flowNN_gradient import get_flowNN_gradient
	from utils.Poisson_blend_img import Poisson_blend_img
	from utils.region_fill import regionfill
	from importlib import import_module
	import yaml
	from RAFT import RAFT
	from RAFT import utils
	import torch.nn.functional as F2
	import torchvision.transforms.functional as F
	from skimage.feature import canny
	import scipy.ndimage
	from PIL import Image
	import imageio
	import torch
	import numpy as np
	import copy
	import glob
	import cv2
	import argparse
	import warnings
	import os
	import sys

	sys.path.append(os.path.abspath(os.path.join(__file__, '..', '..')))
	sys.path.append(os.path.abspath(os.path.join(__file__, '..', '..', 'tool')))
	sys.path.append(os.path.abspath(os.path.join(
	__file__, '..', '..', 'tool', 'utils')))
	sys.path.append(os.path.abspath(os.path.join(
	__file__, '..', '..', 'tool', 'utils', 'region_fill.py')))
	sys.path.append(os.path.abspath(os.path.join(
	__file__, '..', '..', 'tool', 'utils', 'Poisson_blend_img.py')))
	sys.path.append(os.path.abspath(os.path.join(__file__, '..', '..', 'FGT')))
	sys.path.append(os.path.abspath(os.path.join(__file__, '..', '..', 'LAFC')))
	sys.path.append(os.path.abspath(
	os.path.join(os.path.dirname("__file__"), '..')))
	warnings.filterwarnings("ignore")


	def to_tensor(img):
	img = Image.fromarray(img)
	img_t = F.to_tensor(img).float()
	return img_t


	def diffusion(flows, masks):
	flows_filled = []
	for i in range(flows.shape[0]):
	flow, mask = flows[i], masks[i]
	flow_filled = np.zeros(flow.shape)
	flow_filled[:, :, 0] = regionfill(flow[:, :, 0], mask[:, :, 0])
	flow_filled[:, :, 1] = regionfill(flow[:, :, 1], mask[:, :, 0])
	flows_filled.append(flow_filled)
	return flows_filled


	def np2tensor(array, near='c'):
	if isinstance(array, list):
	array = np.stack(array, axis=0) # [t, h, w, c]
	if near == 'c':
	array = torch.from_numpy(np.transpose(array, (3, 0, 1, 2))).unsqueeze(
	0).float() # [1, c, t, h, w]
	elif near == 't':
	array = torch.from_numpy(np.transpose(
	array, (0, 3, 1, 2))).unsqueeze(0).float()
	else:
	raise ValueError(f'Unknown near type: {near}')
	return array


	def tensor2np(array):
	array = torch.stack(array, dim=-1).squeeze(0).permute(1,
	2, 0, 3).cpu().numpy()
	return array


	def gradient_mask(mask):
	gradient_mask = np.logical_or.reduce((mask,
	np.concatenate((mask[1:, :], np.zeros((1, mask.shape[1]), dtype=np.bool)),
	axis=0),
	np.concatenate((mask[:, 1:], np.zeros((mask.shape[0], 1), dtype=np.bool)),
	axis=1)))

	return gradient_mask


	def indicesGen(pivot, interval, frames, t):
	singleSide = frames // 2
	results = []
	for i in range(-singleSide, singleSide + 1):
	index = pivot + interval * i
	if index < 0:
	index = abs(index)
	if index > t - 1:
	index = 2 * (t - 1) - index
	results.append(index)
	return results


	def get_ref_index(f, neighbor_ids, length, ref_length, num_ref):
	ref_index = []
	if num_ref == -1:
	for i in range(0, length, ref_length):
	if i not in neighbor_ids:
	ref_index.append(i)
	else:
	start_idx = max(0, f - ref_length * (num_ref // 2))
	end_idx = min(length, f + ref_length * (num_ref // 2))
	for i in range(start_idx, end_idx + 1, ref_length):
	if i not in neighbor_ids:
	if len(ref_index) > num_ref:
	break
	ref_index.append(i)
	return ref_index


	def save_flows(output, videoFlowF, videoFlowB):
	create_dir(os.path.join(output, 'completed_flow', 'forward_flo'))
	create_dir(os.path.join(output, 'completed_flow', 'backward_flo'))
	create_dir(os.path.join(output, 'completed_flow', 'forward_png'))
	create_dir(os.path.join(output, 'completed_flow', 'backward_png'))
	N = videoFlowF.shape[-1]
	for i in range(N):
	forward_flow = videoFlowF[..., i]
	backward_flow = videoFlowB[..., i]
	forward_flow_vis = cvbase.flow2rgb(forward_flow)
	backward_flow_vis = cvbase.flow2rgb(backward_flow)
	cvbase.write_flow(forward_flow, os.path.join(
	output, 'completed_flow', 'forward_flo', '{:05d}.flo'.format(i)))
	cvbase.write_flow(backward_flow, os.path.join(
	output, 'completed_flow', 'backward_flo', '{:05d}.flo'.format(i)))
	imageio.imwrite(os.path.join(output, 'completed_flow',
	'forward_png', '{:05d}.png'.format(i)), forward_flow_vis)
	imageio.imwrite(os.path.join(output, 'completed_flow',
	'backward_png', '{:05d}.png'.format(i)), backward_flow_vis)


	def save_fgcp(output, frames, masks):
	create_dir(os.path.join(output, 'prop_frames'))
	create_dir(os.path.join(output, 'masks_left'))
	create_dir(os.path.join(output, 'prop_frames_npy'))
	create_dir(os.path.join(output, 'masks_left_npy'))

	assert len(frames) == masks.shape[2]
	for i in range(len(frames)):
	cv2.imwrite(os.path.join(output, 'prop_frames',
	'%05d.png' % i), frames[i] * 255.)
	cv2.imwrite(os.path.join(output, 'masks_left', '%05d.png' %
	i), masks[:, :, i] * 255.)
	np.save(os.path.join(output, 'prop_frames_npy',
	'%05d.npy' % i), frames[i] * 255.)
	np.save(os.path.join(output, 'masks_left_npy',
	'%05d.npy' % i), masks[:, :, i] * 255.)


	def create_dir(dir):
	"""Creates a directory if not exist.
	"""
	if not os.path.exists(dir):
	os.makedirs(dir)


	def initialize_RAFT(args, device):
	"""Initializes the RAFT model.
	"""
	model = torch.nn.DataParallel(RAFT(args))
	model.load_state_dict(torch.load(args.raft_model))

	model = model.module
	model.to(device)
	model.eval()

	return model


	def initialize_LAFC(args, device):
	print(args.lafc_ckpts)
	assert len(os.listdir(args.lafc_ckpts)) == 2
	checkpoint, config_file = glob.glob(os.path.join(args.lafc_ckpts, '*.tar'))[0], \
	glob.glob(os.path.join(args.lafc_ckpts, '*.yaml'))[0]
	with open(config_file, 'r') as f:
	configs = yaml.full_load(f)
	model = configs['model']
	pkg = import_module('LAFC.models.{}'.format(model))
	model = pkg.Model(configs)
	state = torch.load(checkpoint, map_location=lambda storage,
	loc: storage.cuda(device))
	model.load_state_dict(state['model_state_dict'])
	model = model.to(device)
	return model, configs


	def initialize_FGT(args, device):
	assert len(os.listdir(args.fgt_ckpts)) == 2
	checkpoint, config_file = glob.glob(os.path.join(args.fgt_ckpts, '*.tar'))[0], \
	glob.glob(os.path.join(args.fgt_ckpts, '*.yaml'))[0]
	with open(config_file, 'r') as f:
	configs = yaml.full_load(f)
	model = configs['model']
	net = import_module('FGT.models.{}'.format(model))
	model = net.Model(configs).to(device)
	state = torch.load(checkpoint, map_location=lambda storage,
	loc: storage.cuda(device))
	model.load_state_dict(state['model_state_dict'])
	return model, configs


	def calculate_flow(args, model, video, mode):
	"""Calculates optical flow.
	"""
	if mode not in ['forward', 'backward']:
	raise NotImplementedError

	imgH, imgW = args.imgH, args.imgW
	Flow = np.empty(((imgH, imgW, 2, 0)), dtype=np.float32)

	if args.vis_flows:
	create_dir(os.path.join(args.outroot, 'flow', mode + '_flo'))
	create_dir(os.path.join(args.outroot, 'flow', mode + '_png'))

	with torch.no_grad():
	for i in range(video.shape[0] - 1):
	print(
	"Calculating {0} flow {1:2d} <---> {2:2d}".format(mode, i, i + 1), '\r', end='')
	if mode == 'forward':
	# Flow i -> i + 1
	image1 = video[i, None]
	image2 = video[i + 1, None]
	elif mode == 'backward':
	# Flow i + 1 -> i
	image1 = video[i + 1, None]
	image2 = video[i, None]
	else:
	raise NotImplementedError

	_, flow = model(image1, image2, iters=20, test_mode=True)
	flow = flow[0].permute(1, 2, 0).cpu().numpy()
	# resize optical flows
	h, w = flow.shape[:2]
	if h != imgH or w != imgW:
	flow = cv2.resize(flow, (imgW, imgH), cv2.INTER_LINEAR)
	flow[:, :, 0] *= (float(imgW) / float(w))
	flow[:, :, 1] *= (float(imgH) / float(h))

	Flow = np.concatenate((Flow, flow[..., None]), axis=-1)

	if args.vis_flows:
	# Flow visualization.
	flow_img = utils.flow_viz.flow_to_image(flow)
	flow_img = Image.fromarray(flow_img)

	# Saves the flow and flow_img.
	flow_img.save(os.path.join(args.outroot, 'flow',
	mode + '_png', '%05d.png' % i))
	utils.frame_utils.writeFlow(os.path.join(
	args.outroot, 'flow', mode + '_flo', '%05d.flo' % i), flow)

	return Flow


	def extrapolation(args, video_ori, corrFlowF_ori, corrFlowB_ori):
	"""Prepares the data for video extrapolation.
	"""
	imgH, imgW, _, nFrame = video_ori.shape

	# Defines new FOV.
	imgH_extr = int(args.H_scale * imgH)
	imgW_extr = int(args.W_scale * imgW)
	imgH_extr = imgH_extr - imgH_extr % 4
	imgW_extr = imgW_extr - imgW_extr % 4
	H_start = int((imgH_extr - imgH) / 2)
	W_start = int((imgW_extr - imgW) / 2)

	# Generates the mask for missing region.
	flow_mask = np.ones(((imgH_extr, imgW_extr)), dtype=np.bool)
	flow_mask[H_start: H_start + imgH, W_start: W_start + imgW] = 0

	mask_dilated = gradient_mask(flow_mask)

	# Extrapolates the FOV for video.
	video = np.zeros(((imgH_extr, imgW_extr, 3, nFrame)), dtype=np.float32)
	video[H_start: H_start + imgH, W_start: W_start + imgW, :, :] = video_ori

	for i in range(nFrame):
	print("Preparing frame {0}".format(i), '\r', end='')
	video[:, :, :, i] = cv2.inpaint((video[:, :, :, i] * 255).astype(np.uint8), flow_mask.astype(np.uint8), 3,
	cv2.INPAINT_TELEA).astype(np.float32) / 255.

	# Extrapolates the FOV for flow.
	corrFlowF = np.zeros(
	((imgH_extr, imgW_extr, 2, nFrame - 1)), dtype=np.float32)
	corrFlowB = np.zeros(
	((imgH_extr, imgW_extr, 2, nFrame - 1)), dtype=np.float32)
	corrFlowF[H_start: H_start + imgH,
	W_start: W_start + imgW, :] = corrFlowF_ori
	corrFlowB[H_start: H_start + imgH,
	W_start: W_start + imgW, :] = corrFlowB_ori

	return video, corrFlowF, corrFlowB, flow_mask, mask_dilated, (W_start, H_start), (W_start + imgW, H_start + imgH)


	def complete_flow(config, flow_model, flows, flow_masks, mode, device):
	if mode not in ['forward', 'backward']:
	raise NotImplementedError(f'Error flow mode {mode}')
	flow_masks = np.moveaxis(flow_masks, -1, 0) # [N, H, W]
	flows = np.moveaxis(flows, -1, 0) # [N, H, W, 2]
	if len(flow_masks.shape) == 3:
	flow_masks = flow_masks[:, :, :, np.newaxis]
	if mode == 'forward':
	flow_masks = flow_masks[0:-1]
	else:
	flow_masks = flow_masks[1:]

	num_flows, flow_interval = config['num_flows'], config['flow_interval']

	diffused_flows = diffusion(flows, flow_masks)

	flows = np2tensor(flows)
	flow_masks = np2tensor(flow_masks)
	diffused_flows = np2tensor(diffused_flows)

	flows = flows.to(device)
	flow_masks = flow_masks.to(device)
	diffused_flows = diffused_flows.to(device)

	t = diffused_flows.shape[2]
	filled_flows = [None] * t
	pivot = num_flows // 2
	for i in range(t):
	indices = indicesGen(i, flow_interval, num_flows, t)
	print('Indices: ', indices, '\r', end='')
	cand_flows = flows[:, :, indices]
	cand_masks = flow_masks[:, :, indices]
	inputs = diffused_flows[:, :, indices]
	pivot_mask = cand_masks[:, :, pivot]
	pivot_flow = cand_flows[:, :, pivot]
	with torch.no_grad():
	output_flow = flow_model(inputs, cand_masks)
	if isinstance(output_flow, tuple) or isinstance(output_flow, list):
	output_flow = output_flow[0]
	comp = output_flow * pivot_mask + pivot_flow * (1 - pivot_mask)
	if filled_flows[i] is None:
	filled_flows[i] = comp
	assert None not in filled_flows
	return filled_flows


	def read_flow(flow_dir, video):
	nFrame, _, imgH, imgW = video.shape
	Flow = np.empty(((imgH, imgW, 2, 0)), dtype=np.float32)
	flows = sorted(glob.glob(os.path.join(flow_dir, '*.flo')))
	for flow in flows:
	flow_data = cvbase.read_flow(flow)
	h, w = flow_data.shape[:2]
	flow_data = cv2.resize(flow_data, (imgW, imgH), cv2.INTER_LINEAR)
	flow_data[:, :, 0] *= (float(imgW) / float(w))
	flow_data[:, :, 1] *= (float(imgH) / float(h))
	Flow = np.concatenate((Flow, flow_data[..., None]), axis=-1)
	return Flow


	def norm_flows(flows):
	assert len(flows.shape) == 5, 'FLow shape: {}'.format(flows.shape)
	flattened_flows = flows.flatten(3)
	flow_max = torch.max(flattened_flows, dim=-1, keepdim=True)[0]
	flows = flows / flow_max.unsqueeze(-1)
	return flows


	def save_results(outdir, comp_frames):
	out_dir = os.path.join(outdir, 'frames')
	if not os.path.exists(out_dir):
	os.makedirs(out_dir)
	for i in range(len(comp_frames)):
	out_path = os.path.join(out_dir, '{:05d}.png'.format(i))
	cv2.imwrite(out_path, comp_frames[i][:, :, ::-1])


	def video_inpainting(args, imgArr, imgMaskArr):
	device = torch.device('cuda:{}'.format(args.gpu))
	print(args)
	if args.opt is not None:
	with open(args.opt, 'r') as f:
	opts = yaml.full_load(f)

	for k in opts.keys():
	if k in args:
	setattr(args, k, opts[k])

	print(args)
	# Flow model.
	RAFT_model = initialize_RAFT(args, device)
	# LAFC (flow completion)
	LAFC_model, LAFC_config = initialize_LAFC(args, device)
	# FGT
	FGT_model, FGT_config = initialize_FGT(args, device)

	# Loads frames.
	# filename_list = glob.glob(os.path.join(args.path, '*.png')) + \
	# glob.glob(os.path.join(args.path, '*.jpg'))

	# Obtains imgH, imgW and nFrame.
	imgH, imgW = args.imgH, args.imgW
	# nFrame = len(filename_list)
	nFrame = len(imgArr)

	if imgH < 350:
	flowH, flowW = imgH * 2, imgW * 2
	else:
	flowH, flowW = imgH, imgW

	# Load video.
	video, video_flow = [], []
	if args.mode == 'watermark_removal':
	maskname_list = glob.glob(os.path.join(args.path_mask, '*.png')) + glob.glob(
	os.path.join(args.path_mask, '*.jpg'))
	assert len(filename_list) == len(maskname_list)
	for filename, maskname in zip(sorted(filename_list), sorted(maskname_list)):
	frame = torch.from_numpy(np.array(Image.open(filename)).astype(np.uint8)).permute(2, 0,
	1).float().unsqueeze(0)
	mask = torch.from_numpy(np.array(Image.open(maskname)).astype(np.uint8)).permute(2, 0,
	1).float().unsqueeze(0)
	mask[mask > 0] = 1
	frame = frame * (1 - mask)
	frame = F2.upsample(frame, size=(imgH, imgW),
	mode='bilinear', align_corners=False)
	frame_flow = F2.upsample(frame, size=(
	flowH, flowW), mode='bilinear', align_corners=False)
	video.append(frame)
	video_flow.append(frame_flow)
	else:
	'''for filename in sorted(filename_list):
	frame = torch.from_numpy(np.array(Image.open(filename)).astype(np.uint8)).permute(2, 0, 1).float().unsqueeze(0)
	frame = F2.upsample(frame, size=(imgH, imgW), mode='bilinear', align_corners=False)
	frame_flow = F2.upsample(frame, size=(flowH, flowW), mode='bilinear', align_corners=False)
	video.append(frame)
	video_flow.append(frame_flow)'''
	for im in imgArr:
	frame = torch.from_numpy(np.array(im).astype(
	np.uint8)).permute(2, 0, 1).float().unsqueeze(0)
	frame = F2.upsample(frame, size=(imgH, imgW),
	mode='bilinear', align_corners=False)
	frame_flow = F2.upsample(frame, size=(
	flowH, flowW), mode='bilinear', align_corners=False)
	video.append(frame)
	video_flow.append(frame_flow)

	video = torch.cat(video, dim=0) # [n, c, h, w]
	video_flow = torch.cat(video_flow, dim=0)
	gts = video.clone()
	video = video.to(device)
	video_flow = video_flow.to(device)

	# Calcutes the corrupted flow.
	forward_flows = calculate_flow(
	args, RAFT_model, video_flow, 'forward') # [B, C, 2, N]
	backward_flows = calculate_flow(args, RAFT_model, video_flow, 'backward')

	# Makes sure video is in BGR (opencv) format.
	video = video.permute(2, 3, 1, 0).cpu().numpy()[
	:, :, ::-1, :] / 255. # np array -> [h, w, c, N] (0~1)

	if args.mode == 'video_extrapolation':

	# Creates video and flow where the extrapolated region are missing.
	video, forward_flows, backward_flows, flow_mask, mask_dilated, start_point, end_point = extrapolation(args,
	video,
	forward_flows,
	backward_flows)
	imgH, imgW = video.shape[:2]

	# mask indicating the missing region in the video.
	mask = np.tile(flow_mask[..., None], (1, 1, nFrame))
	flow_mask = np.tile(flow_mask[..., None], (1, 1, nFrame))
	mask_dilated = np.tile(mask_dilated[..., None], (1, 1, nFrame))

	else:
	# Loads masks.
	filename_list = glob.glob(os.path.join(args.path_mask, '*.png')) + \
	glob.glob(os.path.join(args.path_mask, '*.jpg'))

	mask = []
	mask_dilated = []
	flow_mask = []
	'''for filename in sorted(filename_list):
	mask_img = np.array(Image.open(filename).convert('L'))
	mask_img = cv2.resize(mask_img, dsize=(imgW, imgH), interpolation=cv2.INTER_NEAREST)

	if args.flow_mask_dilates > 0:
	flow_mask_img = scipy.ndimage.binary_dilation(mask_img, iterations=args.flow_mask_dilates)
	else:
	flow_mask_img = mask_img
	flow_mask.append(flow_mask_img)

	if args.frame_dilates > 0:
	mask_img = scipy.ndimage.binary_dilation(mask_img, iterations=args.frame_dilates)
	mask.append(mask_img)
	mask_dilated.append(gradient_mask(mask_img))'''

	for f_mask in imgMaskArr:
	mask_img = np.array(f_mask)
	mask_img = cv2.resize(mask_img, dsize=(
	imgW, imgH), interpolation=cv2.INTER_NEAREST)

	if args.flow_mask_dilates > 0:
	flow_mask_img = scipy.ndimage.binary_dilation(
	mask_img, iterations=args.flow_mask_dilates)
	else:
	flow_mask_img = mask_img
	flow_mask.append(flow_mask_img)

	if args.frame_dilates > 0:
	mask_img = scipy.ndimage.binary_dilation(
	mask_img, iterations=args.frame_dilates)
	mask.append(mask_img)
	mask_dilated.append(gradient_mask(mask_img))

	# mask indicating the missing region in the video.
	mask = np.stack(mask, -1).astype(np.bool) # [H, W, C, N]
	mask_dilated = np.stack(mask_dilated, -1).astype(np.bool)
	flow_mask = np.stack(flow_mask, -1).astype(np.bool)

	# Completes the flow.
	videoFlowF = complete_flow(
	LAFC_config, LAFC_model, forward_flows, flow_mask, 'forward', device)
	videoFlowB = complete_flow(
	LAFC_config, LAFC_model, backward_flows, flow_mask, 'backward', device)
	videoFlowF = tensor2np(videoFlowF)
	videoFlowB = tensor2np(videoFlowB)
	print('\nFinish flow completion.')

	if args.vis_completed_flows:
	save_flows(args.outroot, videoFlowF, videoFlowB)

	# Prepare gradients
	gradient_x = np.empty(((imgH, imgW, 3, 0)), dtype=np.float32)
	gradient_y = np.empty(((imgH, imgW, 3, 0)), dtype=np.float32)

	for indFrame in range(nFrame):
	img = video[:, :, :, indFrame]
	img[mask[:, :, indFrame], :] = 0
	img = cv2.inpaint((img * 255).astype(np.uint8), mask[:, :, indFrame].astype(np.uint8), 3,
	cv2.INPAINT_TELEA).astype(np.float32) / 255.

	gradient_x_ = np.concatenate((np.diff(img, axis=1), np.zeros((imgH, 1, 3), dtype=np.float32)),
	axis=1)
	gradient_y_ = np.concatenate(
	(np.diff(img, axis=0), np.zeros((1, imgW, 3), dtype=np.float32)), axis=0)
	gradient_x = np.concatenate(
	(gradient_x, gradient_x_.reshape(imgH, imgW, 3, 1)), axis=-1)
	gradient_y = np.concatenate(
	(gradient_y, gradient_y_.reshape(imgH, imgW, 3, 1)), axis=-1)

	gradient_x[mask_dilated[:, :, indFrame], :, indFrame] = 0
	gradient_y[mask_dilated[:, :, indFrame], :, indFrame] = 0

	gradient_x_filled = gradient_x
	gradient_y_filled = gradient_y
	mask_gradient = mask_dilated
	video_comp = video

	# Gradient propagation.
	gradient_x_filled, gradient_y_filled, mask_gradient = \
	get_flowNN_gradient(args,
	gradient_x_filled,
	gradient_y_filled,
	mask,
	mask_gradient,
	videoFlowF,
	videoFlowB,
	None,
	None)

	# if there exist holes in mask, Poisson blending will fail. So I did this trick. I sacrifice some value. Another solution is to modify Poisson blending.
	for indFrame in range(nFrame):
	mask_gradient[:, :, indFrame] = scipy.ndimage.binary_fill_holes(mask_gradient[:, :, indFrame]).astype(
	np.bool)

	# After one gradient propagation iteration
	# gradient --> RGB
	frameBlends = []
	for indFrame in range(nFrame):
	print("Poisson blending frame {0:3d}".format(indFrame))

	if mask[:, :, indFrame].sum() > 0:
	try:
	frameBlend, UnfilledMask = Poisson_blend_img(video_comp[:, :, :, indFrame],
	gradient_x_filled[:,
	0: imgW - 1, :, indFrame],
	gradient_y_filled[0: imgH -
	1, :, :, indFrame],
	mask[:, :, indFrame], mask_gradient[:, :, indFrame])
	except:
	frameBlend, UnfilledMask = video_comp[:,
	:, :, indFrame], mask[:, :, indFrame]

	frameBlend = np.clip(frameBlend, 0, 1.0)
	tmp = cv2.inpaint((frameBlend * 255).astype(np.uint8), UnfilledMask.astype(np.uint8), 3,
	cv2.INPAINT_TELEA).astype(np.float32) / 255.
	frameBlend[UnfilledMask, :] = tmp[UnfilledMask, :]

	video_comp[:, :, :, indFrame] = frameBlend
	mask[:, :, indFrame] = UnfilledMask

	frameBlend_ = copy.deepcopy(frameBlend)
	# Green indicates the regions that are not filled yet.
	frameBlend_[mask[:, :, indFrame], :] = [0, 1., 0]
	else:
	frameBlend_ = video_comp[:, :, :, indFrame]
	frameBlends.append(frameBlend_)

	if args.vis_prop:
	save_fgcp(args.outroot, frameBlends, mask)

	video_length = len(frameBlends)

	for i in range(len(frameBlends)):
	frameBlends[i] = frameBlends[i][:, :, ::-1]

	frames_first = np2tensor(frameBlends, near='t').to(device)
	mask = np.moveaxis(mask, -1, 0)
	mask = mask[:, :, :, np.newaxis]
	masks = np2tensor(mask, near='t').to(device)
	normed_frames = frames_first * 2 - 1
	comp_frames = [None] * video_length

	ref_length = args.step
	num_ref = args.num_ref
	neighbor_stride = args.neighbor_stride

	videoFlowF = np.moveaxis(videoFlowF, -1, 0)

	videoFlowF = np.concatenate([videoFlowF, videoFlowF[-1:, ...]], axis=0)

	flows = np2tensor(videoFlowF, near='t')
	flows = norm_flows(flows).to(device)

	for f in range(0, video_length, neighbor_stride):
	neighbor_ids = [i for i in range(
	max(0, f - neighbor_stride), min(video_length, f + neighbor_stride + 1))]
	ref_ids = get_ref_index(
	f, neighbor_ids, video_length, ref_length, num_ref)
	print(f, len(neighbor_ids), len(ref_ids))
	selected_frames = normed_frames[:, neighbor_ids + ref_ids]
	selected_masks = masks[:, neighbor_ids + ref_ids]
	masked_frames = selected_frames * (1 - selected_masks)
	selected_flows = flows[:, neighbor_ids + ref_ids]
	with torch.no_grad():
	filled_frames = FGT_model(
	masked_frames, selected_flows, selected_masks)
	filled_frames = (filled_frames + 1) / 2
	filled_frames = filled_frames.cpu().permute(0, 2, 3, 1).numpy() * 255
	for i in range(len(neighbor_ids)):
	idx = neighbor_ids[i]
	valid_frame = frames_first[0, idx].cpu().permute(
	1, 2, 0).numpy() * 255.
	valid_mask = masks[0, idx].cpu().permute(1, 2, 0).numpy()
	comp = np.array(filled_frames[i]).astype(np.uint8) * valid_mask + \
	np.array(valid_frame).astype(np.uint8) * (1 - valid_mask)
	if comp_frames[idx] is None:
	comp_frames[idx] = comp
	else:
	comp_frames[idx] = comp_frames[idx].astype(
	np.float32) * 0.5 + comp.astype(np.float32) * 0.5
	if args.vis_frame:
	save_results(args.outroot, comp_frames)
	create_dir(args.outroot)
	for i in range(len(comp_frames)):
	comp_frames[i] = comp_frames[i].astype(np.uint8)
	imageio.mimwrite(os.path.join(args.outroot, 'result.mp4'),
	comp_frames, fps=30, quality=8)
	print(f'Done, please check your result in {args.outroot} ')


	def main(args):
	assert args.mode in ('object_removal', 'video_extrapolation', 'watermark_removal'), (
	"Accepted modes: 'object_removal', 'video_extrapolation', and 'watermark_removal', but input is %s"
	) % args.mode
	video_inpainting(args)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--opt', default='configs/object_removal.yaml',
	help='Please select your config file for inference')
	# video completion
	parser.add_argument('--mode', default='object_removal', choices=[
	'object_removal', 'watermark_removal', 'video_extrapolation'], help="modes: object_removal / video_extrapolation")
	parser.add_argument(
	'--path', default='/myData/davis_resized/walking', help="dataset for evaluation")
	parser.add_argument(
	'--path_mask', default='/myData/dilateAnnotations_4/walking', help="mask for object removal")
	parser.add_argument(
	'--outroot', default='quick_start/walking3', help="output directory")
	parser.add_argument('--consistencyThres', dest='consistencyThres', default=5, type=float,
	help='flow consistency error threshold')
	parser.add_argument('--alpha', dest='alpha', default=0.1, type=float)
	parser.add_argument('--Nonlocal', dest='Nonlocal',
	default=False, type=bool)

	# RAFT
	parser.add_argument(
	'--raft_model', default='../LAFC/flowCheckPoint/raft-things.pth', help="restore checkpoint")
	parser.add_argument('--small', action='store_true', help='use small model')
	parser.add_argument('--mixed_precision',
	action='store_true', help='use mixed precision')
	parser.add_argument('--alternate_corr', action='store_true',
	help='use efficent correlation implementation')

	# LAFC
	parser.add_argument('--lafc_ckpts', type=str, default='../LAFC/checkpoint')

	# FGT
	parser.add_argument('--fgt_ckpts', type=str, default='../FGT/checkpoint')

	# extrapolation
	parser.add_argument('--H_scale', dest='H_scale', default=2,
	type=float, help='H extrapolation scale')
	parser.add_argument('--W_scale', dest='W_scale', default=2,
	type=float, help='W extrapolation scale')

	# Image basic information
	parser.add_argument('--imgH', type=int, default=256)
	parser.add_argument('--imgW', type=int, default=432)
	parser.add_argument('--flow_mask_dilates', type=int, default=8)
	parser.add_argument('--frame_dilates', type=int, default=0)

	parser.add_argument('--gpu', type=int, default=0)

	# FGT inference parameters
	parser.add_argument('--step', type=int, default=10)
	parser.add_argument('--num_ref', type=int, default=-1)
	parser.add_argument('--neighbor_stride', type=int, default=5)

	# visualization
	parser.add_argument('--vis_flows', action='store_true',
	help='Visualize the initialized flows')
	parser.add_argument('--vis_completed_flows',
	action='store_true', help='Visualize the completed flows')
	parser.add_argument('--vis_prop', action='store_true',
	help='Visualize the frames after stage-I filling (flow guided content propagation)')
	parser.add_argument('--vis_frame', action='store_true',
	help='Visualize frames')

	args = parser.parse_args()

	main(args)