UDCAI commited on
Commit
b67d96d
·
verified ·
1 Parent(s): a1206b7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1053 -0
app.py ADDED
@@ -0,0 +1,1053 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) Alibaba, Inc. and its affiliates.
3
+ import shutil
4
+ import sys
5
+ import json
6
+ import os
7
+ import argparse
8
+ import datetime
9
+ import copy
10
+ import random
11
+
12
+ import cv2
13
+ import imageio
14
+ import numpy as np
15
+ import gradio as gr
16
+ import tempfile
17
+ from pycocotools import mask as mask_utils
18
+
19
+ sys.path.insert(0, os.path.sep.join(os.path.realpath(__file__).split(os.path.sep)[:-3]))
20
+ from vace.annotators.utils import single_rle_to_mask, read_video_frames, save_one_video, read_video_one_frame
21
+ from vace.configs import VACE_IMAGE_PREPROCCESS_CONFIGS, VACE_IMAGE_MASK_PREPROCCESS_CONFIGS, VACE_IMAGE_MASKAUG_PREPROCCESS_CONFIGS, VACE_VIDEO_PREPROCCESS_CONFIGS, VACE_VIDEO_MASK_PREPROCCESS_CONFIGS, VACE_VIDEO_MASKAUG_PREPROCCESS_CONFIGS, VACE_COMPOSITION_PREPROCCESS_CONFIGS
22
+ import vace.annotators as annotators
23
+
24
+
25
+ def tid_maker():
26
+ return '{0:%Y%m%d%H%M%S%f}'.format(datetime.datetime.now())
27
+
28
+ def dict_to_markdown_table(d):
29
+ markdown = "| Key | Value |\n"
30
+ markdown += "| --- | ----- |\n"
31
+ for key, value in d.items():
32
+ markdown += f"| {key} | {value} |\n"
33
+ return markdown
34
+
35
+
36
+ class VACEImageTag():
37
+ def __init__(self, cfg):
38
+ self.save_dir = os.path.join(cfg.save_dir, 'image')
39
+ if not os.path.exists(self.save_dir):
40
+ os.makedirs(self.save_dir)
41
+
42
+ self.image_anno_processor = {}
43
+ self.load_image_anno_list = ["image_plain", "image_depth", "image_gray", "image_pose", "image_scribble", "image_outpainting"]
44
+ for anno_name, anno_cfg in copy.deepcopy(VACE_IMAGE_PREPROCCESS_CONFIGS).items():
45
+ if anno_name not in self.load_image_anno_list: continue
46
+ class_name = anno_cfg.pop("NAME")
47
+ input_params = anno_cfg.pop("INPUTS")
48
+ output_params = anno_cfg.pop("OUTPUTS")
49
+ anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
50
+ self.image_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
51
+ "anno_ins": anno_ins}
52
+
53
+ self.mask_anno_processor = {}
54
+ self.load_mask_anno_list = ["image_mask_plain", "image_mask_seg", "image_mask_draw", "image_mask_face"]
55
+ for anno_name, anno_cfg in copy.deepcopy(VACE_IMAGE_MASK_PREPROCCESS_CONFIGS).items():
56
+ if anno_name not in self.load_mask_anno_list: continue
57
+ class_name = anno_cfg.pop("NAME")
58
+ input_params = anno_cfg.pop("INPUTS")
59
+ output_params = anno_cfg.pop("OUTPUTS")
60
+ anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
61
+ self.mask_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
62
+ "anno_ins": anno_ins}
63
+
64
+ self.maskaug_anno_processor = {}
65
+ self.load_maskaug_anno_list = ["image_maskaug_plain", "image_maskaug_invert", "image_maskaug", "image_maskaug_region_random", "image_maskaug_region_crop"]
66
+ for anno_name, anno_cfg in copy.deepcopy(VACE_IMAGE_MASKAUG_PREPROCCESS_CONFIGS).items():
67
+ if anno_name not in self.load_maskaug_anno_list: continue
68
+ class_name = anno_cfg.pop("NAME")
69
+ input_params = anno_cfg.pop("INPUTS")
70
+ output_params = anno_cfg.pop("OUTPUTS")
71
+ anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
72
+ self.maskaug_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
73
+ "anno_ins": anno_ins}
74
+
75
+ self.seg_type = ['maskpointtrack', 'maskbboxtrack', 'masktrack', 'salientmasktrack', 'salientbboxtrack', 'label', 'caption']
76
+ self.seg_draw_type = ['maskpoint', 'maskbbox', 'mask']
77
+
78
+ def create_ui_image(self, *args, **kwargs):
79
+ with gr.Row(variant="panel"):
80
+ with gr.Column(scale=1):
81
+ with gr.Row():
82
+ self.input_process_image = gr.ImageMask(
83
+ label="input_process_image",
84
+ layers=False,
85
+ type='pil',
86
+ format='png',
87
+ interactive=True)
88
+ with gr.Row(variant="panel"):
89
+ with gr.Column(scale=1):
90
+ with gr.Row():
91
+ self.output_process_image = gr.Image(
92
+ label="output_process_image",
93
+ value=None,
94
+ type='pil',
95
+ image_mode='RGB',
96
+ format='png',
97
+ interactive=False)
98
+ with gr.Column(scale=1):
99
+ with gr.Row():
100
+ self.output_process_masked_image = gr.Image(
101
+ label="output_process_masked_image",
102
+ value=None,
103
+ type='pil',
104
+ image_mode='RGB',
105
+ format='png',
106
+ interactive=False)
107
+ with gr.Column(scale=1):
108
+ with gr.Row():
109
+ self.output_process_mask = gr.Image(
110
+ label="output_process_mask",
111
+ value=None,
112
+ type='pil',
113
+ image_mode='L',
114
+ format='png',
115
+ interactive=False)
116
+ with gr.Row(variant="panel"):
117
+ with gr.Column(scale=1):
118
+ with gr.Row():
119
+ self.image_process_type = gr.Dropdown(
120
+ label='Image Annotator',
121
+ choices=list(self.image_anno_processor.keys()),
122
+ value=list(self.image_anno_processor.keys())[0],
123
+ interactive=True)
124
+ with gr.Row(visible=False) as self.outpainting_setting:
125
+ self.outpainting_direction = gr.Dropdown(
126
+ multiselect=True,
127
+ label='Outpainting Direction',
128
+ choices=['left', 'right', 'up', 'down'],
129
+ value=['left', 'right', 'up', 'down'],
130
+ interactive=True)
131
+ self.outpainting_ratio = gr.Slider(
132
+ label='Outpainting Ratio',
133
+ minimum=0.0,
134
+ maximum=2.0,
135
+ step=0.1,
136
+ value=0.3,
137
+ interactive=True)
138
+ with gr.Column(scale=1):
139
+ with gr.Row():
140
+ self.mask_process_type = gr.Dropdown(
141
+ label='Mask Annotator',
142
+ choices=list(self.mask_anno_processor.keys()),
143
+ value=list(self.mask_anno_processor.keys())[0],
144
+ interactive=True)
145
+ with gr.Row():
146
+ self.mask_opacity = gr.Slider(
147
+ label='Mask Opacity',
148
+ minimum=0.0,
149
+ maximum=1.0,
150
+ step=0.1,
151
+ value=1.0,
152
+ interactive=True)
153
+ self.mask_gray = gr.Checkbox(
154
+ label='Mask Gray',
155
+ value=True,
156
+ interactive=True)
157
+ with gr.Row(visible=False) as self.segment_setting:
158
+ self.mask_type = gr.Dropdown(
159
+ label='Segment Type',
160
+ choices=self.seg_type,
161
+ value='maskpointtrack',
162
+ interactive=True)
163
+ self.mask_segtag = gr.Textbox(
164
+ label='Mask Seg Tag',
165
+ value='',
166
+ interactive=True)
167
+ with gr.Column(scale=1):
168
+ with gr.Row():
169
+ self.mask_aug_process_type = gr.Dropdown(
170
+ label='Mask Aug Annotator',
171
+ choices=list(self.maskaug_anno_processor.keys()),
172
+ value=list(self.maskaug_anno_processor.keys())[0],
173
+ interactive=True)
174
+ with gr.Row(visible=False) as self.maskaug_setting:
175
+ self.mask_aug_type = gr.Dropdown(
176
+ label='Mask Aug Type',
177
+ choices=['random', 'original', 'original_expand', 'hull', 'hull_expand', 'bbox', 'bbox_expand'],
178
+ value='original',
179
+ interactive=True)
180
+ self.mask_expand_ratio = gr.Slider(
181
+ label='Mask Expand Ratio',
182
+ minimum=0.0,
183
+ maximum=1.0,
184
+ step=0.1,
185
+ value=0.3,
186
+ interactive=True)
187
+ self.mask_expand_iters = gr.Slider(
188
+ label='Mask Expand Iters',
189
+ minimum=1,
190
+ maximum=10,
191
+ step=1,
192
+ value=5,
193
+ interactive=True)
194
+ with gr.Row(variant="panel"):
195
+ with gr.Column(scale=1):
196
+ with gr.Row():
197
+ self.process_button = gr.Button(
198
+ value='[1]Sample Process',
199
+ elem_classes='type_row',
200
+ elem_id='process_button',
201
+ visible=True)
202
+ with gr.Row():
203
+ self.save_button = gr.Button(
204
+ value='[2]Sample Save',
205
+ elem_classes='type_row',
206
+ elem_id='save_button',
207
+ visible=True)
208
+ with gr.Row():
209
+ self.save_log = gr.Markdown()
210
+
211
+
212
+ def change_process_type(self, image_process_type, mask_process_type, mask_aug_process_type):
213
+ outpainting_setting_visible = False
214
+ segment_setting = False
215
+ maskaug_setting = False
216
+ segment_choices = self.seg_type
217
+ if image_process_type == "image_outpainting":
218
+ outpainting_setting_visible = True
219
+ if mask_process_type in ["image_mask_seg", "image_mask_draw"]:
220
+ segment_setting = True
221
+ if mask_process_type in ["image_mask_draw"]:
222
+ segment_choices = self.seg_draw_type
223
+ if mask_aug_process_type in ["image_maskaug", "image_maskaug_region_random", "image_maskaug_region_crop"]:
224
+ maskaug_setting = True
225
+ return gr.update(visible=outpainting_setting_visible), gr.update(visible=segment_setting), gr.update(choices=segment_choices, value=segment_choices[0]), gr.update(visible=maskaug_setting)
226
+
227
+ def process_image_data(self, input_process_image, image_process_type, outpainting_direction, outpainting_ratio, mask_process_type, mask_type, mask_segtag, mask_opacity, mask_gray, mask_aug_process_type, mask_aug_type, mask_expand_ratio, mask_expand_iters):
228
+ image = np.array(input_process_image['background'].convert('RGB'))
229
+ mask = np.array(input_process_image['layers'][0].split()[-1].convert('L'))
230
+ image_shape = image.shape
231
+
232
+ if image_process_type in ['image_outpainting']:
233
+ ret_data = self.image_anno_processor[image_process_type]['anno_ins'].forward(image, direction=outpainting_direction, expand_ratio=outpainting_ratio)
234
+ image, mask = ret_data['image'], ret_data['mask']
235
+ else:
236
+ image = self.image_anno_processor[image_process_type]['anno_ins'].forward(image)
237
+ if image.shape != image_shape:
238
+ image = cv2.resize(image, image_shape[:2][::-1], interpolation=cv2.INTER_LINEAR)
239
+
240
+ if mask_process_type in ["image_mask_seg"]:
241
+ mask = mask[..., None]
242
+ mask = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(image, mask=mask, label=mask_segtag, caption=mask_segtag, mode=mask_type)['mask']
243
+ elif mask_process_type in ['image_mask_draw']:
244
+ ret_data = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(mask=mask, mode=mask_type)
245
+ mask = ret_data['mask'] if isinstance(ret_data, dict) and 'mask' in ret_data else ret_data
246
+ elif mask_process_type in ['image_mask_face']:
247
+ ret_data = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(image=image)
248
+ mask = ret_data['mask'] if isinstance(ret_data, dict) and 'mask' in ret_data else ret_data
249
+ else:
250
+ ret_data = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(mask=mask)
251
+ mask = ret_data['mask'] if isinstance(ret_data, dict) and 'mask' in ret_data else ret_data
252
+
253
+ mask_cfg = {
254
+ 'mode': mask_aug_type,
255
+ 'kwargs': {
256
+ 'expand_ratio': mask_expand_ratio,
257
+ 'expand_iters': mask_expand_iters
258
+ }
259
+ }
260
+ if mask_aug_process_type == 'image_maskaug':
261
+ mask = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(np.array(mask), mask_cfg)
262
+ elif mask_aug_process_type in ["image_maskaug_region_random", "image_maskaug_region_crop"]:
263
+ image = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(np.array(image), np.array(mask), mask_cfg=mask_cfg)
264
+ else:
265
+ ret_data = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(mask=mask)
266
+ mask = ret_data['mask'] if isinstance(ret_data, dict) and 'mask' in ret_data else ret_data
267
+
268
+ if mask_opacity > 0:
269
+ if mask.shape[:2] != image.shape[:2]:
270
+ raise gr.Error(f"Mask shape {mask.shape[:2]} should be the same as image shape {image.shape[:2]} or set mask_opacity to 0.")
271
+ if mask_gray:
272
+ masked_image = image.copy()
273
+ masked_image[mask == 255] = 127.5
274
+ else:
275
+ mask_weight = mask / 255 * mask_opacity
276
+ masked_image = np.clip(image * (1 - mask_weight[:, :, None]), 0, 255).astype(np.uint8)
277
+ else:
278
+ masked_image = image
279
+ return image, masked_image, mask
280
+
281
+ def save_image_data(self, input_image, image, masked_image, mask):
282
+ save_data = {
283
+ "input_image": input_image['background'].convert('RGB') if isinstance(input_image, dict) else input_image,
284
+ "input_image_mask": input_image['layers'][0].split()[-1].convert('L') if isinstance(input_image, dict) else None,
285
+ "output_image": image,
286
+ "output_masked_image": masked_image,
287
+ "output_image_mask": mask
288
+ }
289
+ save_info = {}
290
+ tid = tid_maker()
291
+ for name, image in save_data.items():
292
+ if image is None: continue
293
+ save_image_dir = os.path.join(self.save_dir, tid[:8])
294
+ if not os.path.exists(save_image_dir): os.makedirs(save_image_dir)
295
+ save_image_path = os.path.join(save_image_dir, tid + '-' + name + '.png')
296
+ save_info[name] = save_image_path
297
+ image.save(save_image_path)
298
+ gr.Info(f'Save {name} to {save_image_path}', duration=15)
299
+ save_txt_path = os.path.join(self.save_dir, tid[:8], tid + '.txt')
300
+ save_info['save_info'] = save_txt_path
301
+ with open(save_txt_path, 'w') as f:
302
+ f.write(json.dumps(save_info, ensure_ascii=False))
303
+ return dict_to_markdown_table(save_info)
304
+
305
+
306
+ def set_callbacks_image(self, **kwargs):
307
+ inputs = [self.input_process_image, self.image_process_type, self.outpainting_direction, self.outpainting_ratio, self.mask_process_type, self.mask_type, self.mask_segtag, self.mask_opacity, self.mask_gray, self.mask_aug_process_type, self.mask_aug_type, self.mask_expand_ratio, self.mask_expand_iters]
308
+ outputs = [self.output_process_image, self.output_process_masked_image, self.output_process_mask]
309
+ self.process_button.click(self.process_image_data,
310
+ inputs=inputs,
311
+ outputs=outputs)
312
+ self.save_button.click(self.save_image_data,
313
+ inputs=[self.input_process_image, self.output_process_image, self.output_process_masked_image, self.output_process_mask],
314
+ outputs=[self.save_log])
315
+ process_inputs = [self.image_process_type, self.mask_process_type, self.mask_aug_process_type]
316
+ process_outputs = [self.outpainting_setting, self.segment_setting, self.mask_type, self.maskaug_setting]
317
+ self.image_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
318
+ self.mask_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
319
+ self.mask_aug_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
320
+
321
+
322
+ class VACEVideoTag():
323
+ def __init__(self, cfg):
324
+ self.save_dir = os.path.join(cfg.save_dir, 'video')
325
+ if not os.path.exists(self.save_dir):
326
+ os.makedirs(self.save_dir)
327
+
328
+ self.video_anno_processor = {}
329
+ self.load_video_anno_list = ["plain", "depth", "flow", "gray", "pose", "scribble", "outpainting", "outpainting_inner", "framerefext"]
330
+ for anno_name, anno_cfg in copy.deepcopy(VACE_VIDEO_PREPROCCESS_CONFIGS).items():
331
+ if anno_name not in self.load_video_anno_list: continue
332
+ class_name = anno_cfg.pop("NAME")
333
+ input_params = anno_cfg.pop("INPUTS")
334
+ output_params = anno_cfg.pop("OUTPUTS")
335
+ anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
336
+ self.video_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
337
+ "anno_ins": anno_ins}
338
+
339
+ self.mask_anno_processor = {}
340
+ self.load_mask_anno_list = ["mask_expand", "mask_seg"]
341
+ for anno_name, anno_cfg in copy.deepcopy(VACE_VIDEO_MASK_PREPROCCESS_CONFIGS).items():
342
+ if anno_name not in self.load_mask_anno_list: continue
343
+ class_name = anno_cfg.pop("NAME")
344
+ input_params = anno_cfg.pop("INPUTS")
345
+ output_params = anno_cfg.pop("OUTPUTS")
346
+ anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
347
+ self.mask_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
348
+ "anno_ins": anno_ins}
349
+
350
+ self.maskaug_anno_processor = {}
351
+ self.load_maskaug_anno_list = ["maskaug_plain", "maskaug_invert", "maskaug", "maskaug_layout"]
352
+ for anno_name, anno_cfg in copy.deepcopy(VACE_VIDEO_MASKAUG_PREPROCCESS_CONFIGS).items():
353
+ if anno_name not in self.load_maskaug_anno_list: continue
354
+ class_name = anno_cfg.pop("NAME")
355
+ input_params = anno_cfg.pop("INPUTS")
356
+ output_params = anno_cfg.pop("OUTPUTS")
357
+ anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
358
+ self.maskaug_anno_processor[anno_name] = {"inputs": input_params, "outputs": output_params,
359
+ "anno_ins": anno_ins}
360
+
361
+
362
+ def create_ui_video(self, *args, **kwargs):
363
+ with gr.Row(variant="panel"):
364
+ with gr.Column(scale=1):
365
+ self.input_process_video = gr.Video(
366
+ label="input_process_video",
367
+ sources=['upload'],
368
+ interactive=True)
369
+ self.input_process_image_show = gr.Image(
370
+ label="input_process_image_show",
371
+ format='png',
372
+ interactive=False)
373
+ with gr.Column(scale=2):
374
+ self.input_process_image = gr.ImageMask(
375
+ label="input_process_image",
376
+ layers=False,
377
+ type='pil',
378
+ format='png',
379
+ interactive=True)
380
+ with gr.Row(variant="panel"):
381
+ with gr.Column(scale=1):
382
+ with gr.Row():
383
+ self.output_process_video = gr.Video(
384
+ label="output_process_video",
385
+ value=None,
386
+ interactive=False)
387
+ with gr.Column(scale=1):
388
+ with gr.Row():
389
+ self.output_process_masked_video = gr.Video(
390
+ label="output_process_masked_video",
391
+ value=None,
392
+ interactive=False)
393
+ with gr.Column(scale=1):
394
+ with gr.Row():
395
+ self.output_process_video_mask = gr.Video(
396
+ label="output_process_video_mask",
397
+ value=None,
398
+ interactive=False)
399
+ with gr.Row(variant="panel"):
400
+ with gr.Column(scale=1):
401
+ with gr.Row():
402
+ self.video_process_type = gr.Dropdown(
403
+ label='Video Annotator',
404
+ choices=list(self.video_anno_processor.keys()),
405
+ value=list(self.video_anno_processor.keys())[0],
406
+ interactive=True)
407
+ with gr.Row(visible=False) as self.outpainting_setting:
408
+ self.outpainting_direction = gr.Dropdown(
409
+ multiselect=True,
410
+ label='Outpainting Direction',
411
+ choices=['left', 'right', 'up', 'down'],
412
+ value=['left', 'right', 'up', 'down'],
413
+ interactive=True)
414
+ self.outpainting_ratio = gr.Slider(
415
+ label='Outpainting Ratio',
416
+ minimum=0.0,
417
+ maximum=2.0,
418
+ step=0.1,
419
+ value=0.3,
420
+ interactive=True)
421
+ with gr.Row(visible=False) as self.frame_reference_setting:
422
+ self.frame_reference_mode = gr.Dropdown(
423
+ label='Frame Reference Mode',
424
+ choices=['first', 'last', 'firstlast', 'random'],
425
+ value='first',
426
+ interactive=True)
427
+ self.frame_reference_num = gr.Textbox(
428
+ label='Frame Reference Num',
429
+ value='1',
430
+ interactive=True)
431
+ with gr.Column(scale=1):
432
+ with gr.Row():
433
+ self.mask_process_type = gr.Dropdown(
434
+ label='Mask Annotator',
435
+ choices=list(self.mask_anno_processor.keys()),
436
+ value=list(self.mask_anno_processor.keys())[0],
437
+ interactive=True)
438
+ with gr.Row():
439
+ self.mask_opacity = gr.Slider(
440
+ label='Mask Opacity',
441
+ minimum=0.0,
442
+ maximum=1.0,
443
+ step=0.1,
444
+ value=1.0,
445
+ interactive=True)
446
+ self.mask_gray = gr.Checkbox(
447
+ label='Mask Gray',
448
+ value=True,
449
+ interactive=True)
450
+ with gr.Row(visible=False) as self.segment_setting:
451
+ self.mask_type = gr.Dropdown(
452
+ label='Segment Type',
453
+ choices=['maskpointtrack', 'maskbboxtrack', 'masktrack', 'salientmasktrack', 'salientbboxtrack',
454
+ 'label', 'caption'],
455
+ value='maskpointtrack',
456
+ interactive=True)
457
+ self.mask_segtag = gr.Textbox(
458
+ label='Mask Seg Tag',
459
+ value='',
460
+ interactive=True)
461
+ with gr.Column(scale=1):
462
+ with gr.Row():
463
+ self.mask_aug_process_type = gr.Dropdown(
464
+ label='Mask Aug Annotator',
465
+ choices=list(self.maskaug_anno_processor.keys()),
466
+ value=list(self.maskaug_anno_processor.keys())[0],
467
+ interactive=True)
468
+ with gr.Row(visible=False) as self.maskaug_setting:
469
+ self.mask_aug_type = gr.Dropdown(
470
+ label='Mask Aug Type',
471
+ choices=['random', 'original', 'original_expand', 'hull', 'hull_expand', 'bbox', 'bbox_expand'],
472
+ value='original',
473
+ interactive=True)
474
+ self.mask_expand_ratio = gr.Slider(
475
+ label='Mask Expand Ratio',
476
+ minimum=0.0,
477
+ maximum=1.0,
478
+ step=0.1,
479
+ value=0.3,
480
+ interactive=True)
481
+ self.mask_expand_iters = gr.Slider(
482
+ label='Mask Expand Iters',
483
+ minimum=1,
484
+ maximum=10,
485
+ step=1,
486
+ value=5,
487
+ interactive=True)
488
+ self.mask_layout_label = gr.Textbox(
489
+ label='Mask Layout Label',
490
+ value='',
491
+ interactive=True)
492
+ with gr.Row(variant="panel"):
493
+ with gr.Column(scale=1):
494
+ with gr.Row():
495
+ self.process_button = gr.Button(
496
+ value='[1]Sample Process',
497
+ elem_classes='type_row',
498
+ elem_id='process_button',
499
+ visible=True)
500
+ with gr.Row():
501
+ self.save_button = gr.Button(
502
+ value='[2]Sample Save',
503
+ elem_classes='type_row',
504
+ elem_id='save_button',
505
+ visible=True)
506
+ with gr.Row():
507
+ self.save_log = gr.Markdown()
508
+
509
+ def process_video_data(self, input_process_video, input_process_image, video_process_type, outpainting_direction, outpainting_ratio, frame_reference_mode, frame_reference_num, mask_process_type, mask_type, mask_segtag, mask_opacity, mask_gray, mask_aug_process_type, mask_aug_type, mask_expand_ratio, mask_expand_iters, mask_layout_label):
510
+ video_frames, fps, width, height, total_frames = read_video_frames(input_process_video, use_type='cv2', info=True)
511
+
512
+ # image = np.array(input_process_image['background'].convert('RGB'))
513
+ mask = input_process_image['layers'][0].split()[-1].convert('L')
514
+ if mask.height != height and mask.width != width:
515
+ mask = mask.resize((width, height))
516
+
517
+ if mask_process_type in ['mask_seg']:
518
+ mask_data = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(video=input_process_video, mask=mask, label=mask_segtag, caption=mask_segtag, mode=mask_type, return_frame=False)
519
+ mask_frames = mask_data['masks']
520
+ elif mask_process_type in ['mask_expand']:
521
+ mask_frames = self.mask_anno_processor[mask_process_type]['anno_ins'].forward(mask=np.array(mask), expand_num=total_frames)
522
+ else:
523
+ raise NotImplementedError
524
+
525
+ output_video = []
526
+ if video_process_type in ['framerefext']:
527
+ output_data = self.video_anno_processor[video_process_type]['anno_ins'].forward(video_frames, ref_cfg={'mode': frame_reference_mode}, ref_num=frame_reference_num)
528
+ output_video, mask_frames = output_data['frames'], output_data['masks']
529
+ elif video_process_type in ['outpainting', 'outpainting_inner']:
530
+ # ratio = ((16 / 9 * height) / width - 1) / 2
531
+ output_data = self.video_anno_processor[video_process_type]['anno_ins'].forward(video_frames, direction=outpainting_direction, expand_ratio=outpainting_ratio)
532
+ output_video, mask_frames = output_data['frames'], output_data['masks']
533
+ else:
534
+ output_video = self.video_anno_processor[video_process_type]['anno_ins'].forward(video_frames)
535
+
536
+
537
+ mask_cfg = {
538
+ 'mode': mask_aug_type,
539
+ 'kwargs': {
540
+ 'expand_ratio': mask_expand_ratio,
541
+ 'expand_iters': mask_expand_iters
542
+ }
543
+ }
544
+ # print(mask_cfg)
545
+ if mask_aug_process_type == 'maskaug_layout':
546
+ output_video = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(mask_frames, mask_cfg=mask_cfg, label=mask_layout_label)
547
+ mask_aug_frames = [ np.ones_like(submask) * 255 for submask in mask_frames ]
548
+ else:
549
+ mask_aug_frames = self.maskaug_anno_processor[mask_aug_process_type]['anno_ins'].forward(mask_frames)
550
+
551
+ with (tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_video_path, \
552
+ tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as masked_video_path, \
553
+ tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as mask_video_path):
554
+ output_video_writer = imageio.get_writer(output_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
555
+ masked_video_writer = imageio.get_writer(masked_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
556
+ mask_video_writer = imageio.get_writer(mask_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
557
+ for i in range(total_frames):
558
+ output_frame = output_video[i] if len(output_video) > 0 else video_frames[i]
559
+ frame = output_video[i] if len(output_video) > 0 else video_frames[i]
560
+ mask = mask_aug_frames[i]
561
+ if mask_gray:
562
+ masked_image = frame.copy()
563
+ masked_image[mask == 255] = 127.5
564
+ else:
565
+ mask_weight = mask / 255 * mask_opacity
566
+ masked_image = np.clip(frame * (1 - mask_weight[:, :, None]), 0, 255).astype(np.uint8)
567
+ output_video_writer.append_data(output_frame)
568
+ masked_video_writer.append_data(masked_image)
569
+ mask_video_writer.append_data(mask)
570
+ output_video_writer.close()
571
+ masked_video_writer.close()
572
+ mask_video_writer.close()
573
+
574
+ return output_video_path.name, masked_video_path.name, mask_video_path.name
575
+
576
+ def save_video_data(self, input_video_path, input_image, video_path, masked_video_path, mask_path):
577
+
578
+ save_image_data = {
579
+ "input_image": input_image['background'].convert('RGB') if isinstance(input_image, dict) else input_image,
580
+ "input_image_mask": input_image['layers'][0].split()[-1].convert('L') if isinstance(input_image, dict) else None
581
+ }
582
+ save_video_data = {
583
+ "input_video": input_video_path,
584
+ "output_video": video_path,
585
+ "output_masked_video": masked_video_path,
586
+ "output_video_mask": mask_path
587
+ }
588
+ save_info = {}
589
+ tid = tid_maker()
590
+ for name, image in save_image_data.items():
591
+ if image is None: continue
592
+ save_image_dir = os.path.join(self.save_dir, tid[:8])
593
+ if not os.path.exists(save_image_dir): os.makedirs(save_image_dir)
594
+ save_image_path = os.path.join(save_image_dir, tid + '-' + name + '.png')
595
+ save_info[name] = save_image_path
596
+ image.save(save_image_path)
597
+ gr.Info(f'Save {name} to {save_image_path}', duration=15)
598
+ for name, ori_video_path in save_video_data.items():
599
+ if ori_video_path is None: continue
600
+ save_video_dir = os.path.join(self.save_dir, tid[:8])
601
+ if not os.path.exists(save_video_dir): os.makedirs(save_video_dir)
602
+ save_video_path = os.path.join(save_video_dir, tid + '-' + name + os.path.splitext(ori_video_path)[-1])
603
+ save_info[name] = save_video_path
604
+ shutil.copy(ori_video_path, save_video_path)
605
+ gr.Info(f'Save {name} to {save_video_path}', duration=15)
606
+
607
+ save_txt_path = os.path.join(self.save_dir, tid[:8], tid + '.txt')
608
+ save_info['save_info'] = save_txt_path
609
+ with open(save_txt_path, 'w') as f:
610
+ f.write(json.dumps(save_info, ensure_ascii=False))
611
+ return dict_to_markdown_table(save_info)
612
+
613
+
614
+ def change_process_type(self, video_process_type, mask_process_type, mask_aug_process_type):
615
+ frame_reference_setting_visible = False
616
+ outpainting_setting_visible = False
617
+ segment_setting = False
618
+ maskaug_setting = False
619
+ if video_process_type in ["framerefext"]:
620
+ frame_reference_setting_visible = True
621
+ elif video_process_type in ["outpainting", "outpainting_inner"]:
622
+ outpainting_setting_visible = True
623
+ if mask_process_type in ["mask_seg"]:
624
+ segment_setting = True
625
+ if mask_aug_process_type in ["maskaug", "maskaug_layout"]:
626
+ maskaug_setting = True
627
+ return gr.update(visible=frame_reference_setting_visible), gr.update(visible=outpainting_setting_visible), gr.update(visible=segment_setting), gr.update(visible=maskaug_setting)
628
+
629
+
630
+ def set_callbacks_video(self, **kwargs):
631
+ inputs = [self.input_process_video, self.input_process_image, self.video_process_type, self.outpainting_direction, self.outpainting_ratio, self.frame_reference_mode, self.frame_reference_num, self.mask_process_type, self.mask_type, self.mask_segtag, self.mask_opacity, self.mask_gray, self.mask_aug_process_type, self.mask_aug_type, self.mask_expand_ratio, self.mask_expand_iters, self.mask_layout_label]
632
+ outputs = [self.output_process_video, self.output_process_masked_video, self.output_process_video_mask]
633
+ self.process_button.click(self.process_video_data, inputs=inputs, outputs=outputs)
634
+ self.input_process_video.change(read_video_one_frame, inputs=[self.input_process_video], outputs=[self.input_process_image_show])
635
+ self.save_button.click(self.save_video_data,
636
+ inputs=[self.input_process_video, self.input_process_image, self.output_process_video, self.output_process_masked_video, self.output_process_video_mask],
637
+ outputs=[self.save_log])
638
+ process_inputs = [self.video_process_type, self.mask_process_type, self.mask_aug_process_type]
639
+ process_outputs = [self.frame_reference_setting, self.outpainting_setting, self.segment_setting, self.maskaug_setting]
640
+ self.video_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
641
+ self.mask_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
642
+ self.mask_aug_process_type.change(self.change_process_type, inputs=process_inputs, outputs=process_outputs)
643
+
644
+
645
+
646
+ class VACETagComposition():
647
+ def __init__(self, cfg):
648
+ self.save_dir = os.path.join(cfg.save_dir, 'composition')
649
+ if not os.path.exists(self.save_dir):
650
+ os.makedirs(self.save_dir)
651
+
652
+ anno_name = 'composition'
653
+ anno_cfg = copy.deepcopy(VACE_COMPOSITION_PREPROCCESS_CONFIGS[anno_name])
654
+ class_name = anno_cfg.pop("NAME")
655
+ input_params = anno_cfg.pop("INPUTS")
656
+ output_params = anno_cfg.pop("OUTPUTS")
657
+ anno_ins = getattr(annotators, class_name)(cfg=anno_cfg)
658
+ self.comp_anno_processor = {"inputs": input_params, "outputs": output_params,
659
+ "anno_ins": anno_ins}
660
+ self.process_types = ["repaint", "extension", "control"]
661
+
662
+ def create_ui_composition(self, *args, **kwargs):
663
+ with gr.Row(variant="panel"):
664
+ with gr.Column(scale=1):
665
+ self.input_process_video_1 = gr.Video(
666
+ label="input_process_video_1",
667
+ sources=['upload'],
668
+ interactive=True)
669
+ with gr.Column(scale=1):
670
+ self.input_process_video_2 = gr.Video(
671
+ label="input_process_video_1",
672
+ sources=['upload'],
673
+ interactive=True)
674
+ with gr.Row(variant="panel"):
675
+ with gr.Column(scale=1):
676
+ with gr.Row():
677
+ self.input_process_video_mask_1 = gr.Video(
678
+ label="input_process_video_mask_1",
679
+ sources=['upload'],
680
+ interactive=True)
681
+ with gr.Column(scale=1):
682
+ with gr.Row():
683
+ self.input_process_video_mask_2 = gr.Video(
684
+ label="input_process_video_mask_2",
685
+ sources=['upload'],
686
+ interactive=True)
687
+ with gr.Row(variant="panel"):
688
+ with gr.Column(scale=1):
689
+ with gr.Row():
690
+ self.input_process_type_1 = gr.Dropdown(
691
+ label='input_process_type_1',
692
+ choices=list(self.process_types),
693
+ value=list(self.process_types)[0],
694
+ interactive=True)
695
+ with gr.Column(scale=1):
696
+ with gr.Row():
697
+ self.input_process_type_2 = gr.Dropdown(
698
+ label='input_process_type_2',
699
+ choices=list(self.process_types),
700
+ value=list(self.process_types)[0],
701
+ interactive=True)
702
+ with gr.Row(variant="panel"):
703
+ with gr.Column(scale=1):
704
+ with gr.Row():
705
+ self.process_button = gr.Button(
706
+ value='[1]Sample Process',
707
+ elem_classes='type_row',
708
+ elem_id='process_button',
709
+ visible=True)
710
+ with gr.Row(variant="panel"):
711
+ with gr.Column(scale=1):
712
+ self.output_process_video = gr.Video(
713
+ label="output_process_video",
714
+ sources=['upload'],
715
+ interactive=False)
716
+ with gr.Column(scale=1):
717
+ self.output_process_mask = gr.Video(
718
+ label="output_process_mask",
719
+ sources=['upload'],
720
+ interactive=False)
721
+ with gr.Row(variant="panel"):
722
+ with gr.Column(scale=1):
723
+ with gr.Row():
724
+ self.save_button = gr.Button(
725
+ value='[2]Sample Save',
726
+ elem_classes='type_row',
727
+ elem_id='save_button',
728
+ visible=True)
729
+ with gr.Row():
730
+ self.save_log = gr.Markdown()
731
+
732
+ def process_composition_data(self, input_process_video_1, input_process_video_2, input_process_video_mask_1, input_process_video_mask_2, input_process_type_1, input_process_type_2):
733
+ # "repaint", "extension", "control"
734
+ # ('repaint', 'repaint') / ('repaint', 'extension') / ('repaint', 'control')
735
+ # ('extension', 'extension') / ('extension', 'repaint') / ('extension', 'control')
736
+ # ('control', 'control') / ('control', 'repaint') / ('control', 'extension')
737
+
738
+ video_frames_1, video_fps_1, video_width_1, video_height_1, video_total_frames_1 = read_video_frames(input_process_video_1, use_type='cv2', info=True)
739
+ video_frames_2, video_fps_2, video_width_2, video_height_2, video_total_frames_2 = read_video_frames(input_process_video_2, use_type='cv2', info=True)
740
+ mask_frames_1, mask_fps_1, mask_width_1, mask_height_1, mask_total_frames_1 = read_video_frames(input_process_video_mask_1, use_type='cv2', info=True)
741
+ mask_frames_2, mask_fps_2, mask_width_2, mask_height_2, mask_total_frames_2 = read_video_frames(input_process_video_mask_2, use_type='cv2', info=True)
742
+ mask_frames_1 = [np.where(mask > 127, 1, 0).astype(np.uint8) for mask in mask_frames_1]
743
+ mask_frames_2 = [np.where(mask > 127, 1, 0).astype(np.uint8) for mask in mask_frames_2]
744
+
745
+ assert video_width_1 == video_width_2 == mask_width_1 == mask_width_2
746
+ assert video_height_1 == video_height_2 == mask_height_1 == mask_height_2
747
+ assert video_fps_1 == video_fps_2
748
+
749
+ output_video, output_mask = self.comp_anno_processor['anno_ins'].forward(input_process_type_1, input_process_type_2, video_frames_1, video_frames_2, mask_frames_1, mask_frames_2)
750
+
751
+ fps = video_fps_1
752
+ total_frames = len(output_video)
753
+ if output_video is not None and output_mask is not None:
754
+ with (tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_video_path, \
755
+ tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as mask_video_path):
756
+ output_video_writer = imageio.get_writer(output_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
757
+ mask_video_writer = imageio.get_writer(mask_video_path.name, codec='libx264', fps=fps, quality=8, macro_block_size=None)
758
+ for i in range(total_frames):
759
+ output_video_writer.append_data(output_video[i])
760
+ mask_video_writer.append_data(output_mask[i])
761
+ output_video_writer.close()
762
+ mask_video_writer.close()
763
+
764
+ return output_video_path.name, mask_video_path.name
765
+ else:
766
+ return None, None
767
+
768
+ def save_composition_data(self, video_path, mask_path):
769
+ save_video_data = {
770
+ "output_video": video_path,
771
+ "output_video_mask": mask_path
772
+ }
773
+ save_info = {}
774
+ tid = tid_maker()
775
+ for name, ori_video_path in save_video_data.items():
776
+ if ori_video_path is None: continue
777
+ save_video_dir = os.path.join(self.save_dir, tid[:8])
778
+ if not os.path.exists(save_video_dir): os.makedirs(save_video_dir)
779
+ save_video_path = os.path.join(save_video_dir, tid + '-' + name + os.path.splitext(ori_video_path)[-1])
780
+ save_info[name] = save_video_path
781
+ shutil.copy(ori_video_path, save_video_path)
782
+ gr.Info(f'Save {name} to {save_video_path}', duration=15)
783
+ save_txt_path = os.path.join(self.save_dir, tid[:8], tid + '.txt')
784
+ save_info['save_info'] = save_txt_path
785
+ with open(save_txt_path, 'w') as f:
786
+ f.write(json.dumps(save_info, ensure_ascii=False))
787
+ return dict_to_markdown_table(save_info)
788
+
789
+ def set_callbacks_composition(self, **kwargs):
790
+ inputs = [self.input_process_video_1, self.input_process_video_2, self.input_process_video_mask_1, self.input_process_video_mask_2, self.input_process_type_1, self.input_process_type_2]
791
+ outputs = [self.output_process_video, self.output_process_mask]
792
+ self.process_button.click(self.process_composition_data,
793
+ inputs=inputs,
794
+ outputs=outputs)
795
+ self.save_button.click(self.save_composition_data,
796
+ inputs=[self.output_process_video, self.output_process_mask],
797
+ outputs=[self.save_log])
798
+
799
+
800
+ class VACEVideoTool():
801
+ def __init__(self, cfg):
802
+ self.save_dir = os.path.join(cfg.save_dir, 'video_tool')
803
+ if not os.path.exists(self.save_dir):
804
+ os.makedirs(self.save_dir)
805
+ self.process_types = ["expand_frame", "expand_clipframe", "concat_clip", "blank_mask"]
806
+
807
+ def create_ui_video_tool(self, *args, **kwargs):
808
+ with gr.Row(variant="panel"):
809
+ with gr.Column(scale=1):
810
+ with gr.Row():
811
+ self.input_process_image_1 = gr.Image(
812
+ label="input_process_image_1",
813
+ type='pil',
814
+ format='png',
815
+ interactive=True)
816
+ with gr.Column(scale=1):
817
+ with gr.Row():
818
+ self.input_process_image_2 = gr.Image(
819
+ label="input_process_image_2",
820
+ type='pil',
821
+ format='png',
822
+ interactive=True)
823
+ with gr.Row(variant="panel"):
824
+ with gr.Column(scale=1):
825
+ self.input_process_video_1 = gr.Video(
826
+ label="input_process_video_1",
827
+ sources=['upload'],
828
+ interactive=True)
829
+ with gr.Column(scale=1):
830
+ self.input_process_video_2 = gr.Video(
831
+ label="input_process_video_1",
832
+ sources=['upload'],
833
+ interactive=True)
834
+ with gr.Row(variant="panel"):
835
+ with gr.Column(scale=1):
836
+ with gr.Row():
837
+ self.input_process_video_mask_1 = gr.Video(
838
+ label="input_process_video_mask_1",
839
+ sources=['upload'],
840
+ interactive=True)
841
+ with gr.Column(scale=1):
842
+ with gr.Row():
843
+ self.input_process_video_mask_2 = gr.Video(
844
+ label="input_process_video_mask_2",
845
+ sources=['upload'],
846
+ interactive=True)
847
+ with gr.Row(variant="panel"):
848
+ with gr.Column(scale=1):
849
+ with gr.Row():
850
+ self.input_process_type = gr.Dropdown(
851
+ label='input_process_type',
852
+ choices=list(self.process_types),
853
+ value=list(self.process_types)[0],
854
+ interactive=True)
855
+ with gr.Row(variant="panel"):
856
+ with gr.Column(scale=1):
857
+ with gr.Row():
858
+ self.output_height = gr.Textbox(
859
+ label='resolutions_height',
860
+ value=720,
861
+ interactive=True)
862
+ self.output_width = gr.Textbox(
863
+ label='resolutions_width',
864
+ value=1280,
865
+ interactive=True)
866
+ self.frame_rate = gr.Textbox(
867
+ label='frame_rate',
868
+ value=16,
869
+ interactive=True)
870
+ self.num_frames = gr.Textbox(
871
+ label='num_frames',
872
+ value=81,
873
+ interactive=True)
874
+ self.mask_gray = gr.Checkbox(
875
+ label='Mask Gray',
876
+ value=False,
877
+ interactive=True)
878
+ with gr.Row(variant="panel"):
879
+ with gr.Column(scale=1):
880
+ with gr.Row():
881
+ self.process_button = gr.Button(
882
+ value='[1]Sample Process',
883
+ elem_classes='type_row',
884
+ elem_id='process_button',
885
+ visible=True)
886
+ with gr.Row(variant="panel"):
887
+ with gr.Column(scale=1):
888
+ with gr.Row():
889
+ self.output_process_image = gr.Image(
890
+ label="output_process_image",
891
+ value=None,
892
+ type='pil',
893
+ image_mode='RGB',
894
+ format='png',
895
+ interactive=False)
896
+ with gr.Column(scale=1):
897
+ self.output_process_video = gr.Video(
898
+ label="output_process_video",
899
+ sources=['upload'],
900
+ interactive=False)
901
+ with gr.Column(scale=1):
902
+ self.output_process_mask = gr.Video(
903
+ label="output_process_mask",
904
+ sources=['upload'],
905
+ interactive=False)
906
+ with gr.Row(variant="panel"):
907
+ with gr.Column(scale=1):
908
+ with gr.Row():
909
+ self.save_button = gr.Button(
910
+ value='[2]Sample Save',
911
+ elem_classes='type_row',
912
+ elem_id='save_button',
913
+ visible=True)
914
+ with gr.Row():
915
+ self.save_log = gr.Markdown()
916
+
917
+ def process_tool_data(self, input_process_image_1, input_process_image_2, input_process_video_1, input_process_video_2, input_process_video_mask_1, input_process_video_mask_2, input_process_type, output_height, output_width, frame_rate, num_frames):
918
+ output_height, output_width, frame_rate, num_frames = int(output_height), int(output_width), int(frame_rate), int(num_frames)
919
+ output_video, output_mask = None, None
920
+ if input_process_type == 'expand_frame':
921
+ assert input_process_image_1 or input_process_image_2
922
+ output_video = [np.ones((output_height, output_width, 3), dtype=np.uint8) * 127.5] * num_frames
923
+ output_mask = [np.ones((output_height, output_width), dtype=np.uint8) * 255] * num_frames
924
+ if input_process_image_1 is not None:
925
+ output_video[0] = np.array(input_process_image_1.resize((output_width, output_height)))
926
+ output_mask[0] = np.zeros((output_height, output_width))
927
+ if input_process_image_2 is not None:
928
+ output_video[-1] = np.array(input_process_image_2.resize((output_width, output_height)))
929
+ output_mask[-1] = np.zeros((output_height, output_width))
930
+ elif input_process_type == 'expand_clipframe':
931
+ video_frames, fps, width, height, total_frames = read_video_frames(input_process_video_1, use_type='cv2', info=True)
932
+ frame_rate = fps
933
+ output_video = video_frames + [np.ones((height, width, 3), dtype=np.uint8) * 127.5] * num_frames
934
+ output_mask = [np.zeros((height, width), dtype=np.uint8)] * total_frames + [np.ones((height, width), dtype=np.uint8) * 255] * num_frames
935
+ output_video[-1] = np.array(input_process_image_2.resize((width, height)))
936
+ output_mask[-1] = np.zeros((height, width))
937
+ elif input_process_type == 'concat_clip':
938
+ video_frames_1, fps_1, width_1, height_1, total_frames_1 = read_video_frames(input_process_video_1, use_type='cv2', info=True)
939
+ video_frames_2, fps_2, width_2, height_2, total_frames_2 = read_video_frames(input_process_video_2, use_type='cv2', info=True)
940
+ if width_1 != width_2 or height_1 != height_2:
941
+ video_frames_2 = [np.array(frame.resize((width_1, height_1))) for frame in video_frames_2]
942
+ frame_rate = fps_1
943
+ output_video = video_frames_1 + video_frames_2
944
+ output_mask = [np.ones((height_1, width_1), dtype=np.uint8) * 255] * len(output_video)
945
+ elif input_process_type == 'blank_mask':
946
+ output_mask = [np.ones((output_height, output_width), dtype=np.uint8) * 255] * num_frames
947
+ else:
948
+ raise NotImplementedError
949
+ output_image_path = None
950
+
951
+ if output_video is not None:
952
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_path:
953
+ flag = save_one_video(videos=output_video, file_path=output_path.name, fps=frame_rate)
954
+ output_video_path = output_path.name if flag else None
955
+ else:
956
+ output_video_path = None
957
+
958
+ if output_mask is not None:
959
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as output_path:
960
+ flag = save_one_video(videos=output_mask, file_path=output_path.name, fps=frame_rate)
961
+ output_mask_path = output_path.name if flag else None
962
+ else:
963
+ output_mask_path = None
964
+ return output_image_path, output_video_path, output_mask_path
965
+
966
+
967
+ def save_tool_data(self, image_path, video_path, mask_path):
968
+ save_video_data = {
969
+ "output_video": video_path,
970
+ "output_video_mask": mask_path
971
+ }
972
+ save_info = {}
973
+ tid = tid_maker()
974
+ for name, ori_video_path in save_video_data.items():
975
+ if ori_video_path is None: continue
976
+ save_video_path = os.path.join(self.save_dir, tid[:8], tid + '-' + name + os.path.splitext(ori_video_path)[-1])
977
+ save_info[name] = save_video_path
978
+ shutil.copy(ori_video_path, save_video_path)
979
+ gr.Info(f'Save {name} to {save_video_path}', duration=15)
980
+ save_txt_path = os.path.join(self.save_dir, tid[:8], tid + '.txt')
981
+ save_info['save_info'] = save_txt_path
982
+ with open(save_txt_path, 'w') as f:
983
+ f.write(json.dumps(save_info, ensure_ascii=False))
984
+ return dict_to_markdown_table(save_info)
985
+
986
+ def set_callbacks_video_tool(self, **kwargs):
987
+ inputs = [self.input_process_image_1, self.input_process_image_2, self.input_process_video_1, self.input_process_video_2, self.input_process_video_mask_1, self.input_process_video_mask_2, self.input_process_type, self.output_height, self.output_width, self.frame_rate, self.num_frames]
988
+ outputs = [self.output_process_image, self.output_process_video, self.output_process_mask]
989
+ self.process_button.click(self.process_tool_data,
990
+ inputs=inputs,
991
+ outputs=outputs)
992
+ self.save_button.click(self.save_tool_data,
993
+ inputs=[self.output_process_image, self.output_process_video, self.output_process_mask],
994
+ outputs=[self.save_log])
995
+
996
+
997
+ class VACETag():
998
+
999
+ def __init__(self, cfg):
1000
+ self.cfg = cfg
1001
+ self.save_dir = cfg.save_dir
1002
+ self.current_index = 0
1003
+ self.loaded_data = {}
1004
+
1005
+ self.vace_video_tag = VACEVideoTag(cfg)
1006
+ self.vace_image_tag = VACEImageTag(cfg)
1007
+ self.vace_tag_composition = VACETagComposition(cfg)
1008
+ # self.vace_video_tool = VACEVideoTool(cfg)
1009
+
1010
+
1011
+ def create_ui(self, *args, **kwargs):
1012
+ gr.Markdown("""
1013
+ <div style="text-align: center; font-size: 24px; font-weight: bold; margin-bottom: 15px;">
1014
+ <a href="https://ali-vilab.github.io/VACE-Page/" style="text-decoration: none; color: inherit;">VACE Preprocessor</a>
1015
+ </div>
1016
+ """)
1017
+ with gr.Tabs(elem_id='VACE Tag') as vace_tab:
1018
+ with gr.TabItem('VACE Video Tag', id=1, elem_id='video_tab'):
1019
+ self.vace_video_tag.create_ui_video(*args, **kwargs)
1020
+ with gr.TabItem('VACE Image Tag', id=2, elem_id='image_tab'):
1021
+ self.vace_image_tag.create_ui_image(*args, **kwargs)
1022
+ with gr.TabItem('VACE Composition Tag', id=3, elem_id='composition_tab'):
1023
+ self.vace_tag_composition.create_ui_composition(*args, **kwargs)
1024
+ # with gr.TabItem('VACE Video Tool', id=4, elem_id='video_tool_tab'):
1025
+ # self.vace_video_tool.create_ui_video_tool(*args, **kwargs)
1026
+
1027
+
1028
+ def set_callbacks(self, **kwargs):
1029
+ self.vace_video_tag.set_callbacks_video(**kwargs)
1030
+ self.vace_image_tag.set_callbacks_image(**kwargs)
1031
+ self.vace_tag_composition.set_callbacks_composition(**kwargs)
1032
+ # self.vace_video_tool.set_callbacks_video_tool(**kwargs)
1033
+
1034
+
1035
+ if __name__ == '__main__':
1036
+ parser = argparse.ArgumentParser(description='Argparser for VACE-Preprocessor:\n')
1037
+ parser.add_argument('--server_port', dest='server_port', help='', default=7860)
1038
+ parser.add_argument('--server_name', dest='server_name', help='', default='0.0.0.0')
1039
+ parser.add_argument('--root_path', dest='root_path', help='', default=None)
1040
+ parser.add_argument('--save_dir', dest='save_dir', help='', default='cache')
1041
+ args = parser.parse_args()
1042
+
1043
+ if not os.path.exists(args.save_dir):
1044
+ os.makedirs(args.save_dir, exist_ok=True)
1045
+
1046
+ vace_tag = VACETag(args)
1047
+ with gr.Blocks() as demo:
1048
+ vace_tag.create_ui()
1049
+ vace_tag.set_callbacks()
1050
+ demo.queue(status_update_rate=1).launch(server_name=args.server_name,
1051
+ server_port=int(args.server_port),
1052
+ show_api=False, show_error=True,
1053
+ debug=True)