toto10 commited on
Commit
5bfcfd2
1 Parent(s): a0611b0

c2dbfba56f79c968b0d59e1af1493bfa9e6cfbd19338ec4a352438df2c986640

Browse files
SD-CN-Animation/scripts/core/__pycache__/txt2vid.cpython-310.pyc ADDED
Binary file (5.74 kB). View file
 
SD-CN-Animation/scripts/core/__pycache__/utils.cpython-310.pyc ADDED
Binary file (10.6 kB). View file
 
SD-CN-Animation/scripts/core/__pycache__/vid2vid.cpython-310.pyc ADDED
Binary file (6.03 kB). View file
 
SD-CN-Animation/scripts/core/flow_utils.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys, os
2
+
3
+ import numpy as np
4
+ import cv2
5
+
6
+ from collections import namedtuple
7
+ import torch
8
+ import argparse
9
+ from RAFT.raft import RAFT
10
+ from RAFT.utils.utils import InputPadder
11
+
12
+ import modules.paths as ph
13
+ import gc
14
+
15
+ RAFT_model = None
16
+ fgbg = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=16, detectShadows=True)
17
+
18
+ def background_subtractor(frame, fgbg):
19
+ fgmask = fgbg.apply(frame)
20
+ return cv2.bitwise_and(frame, frame, mask=fgmask)
21
+
22
+ def RAFT_clear_memory():
23
+ global RAFT_model
24
+ del RAFT_model
25
+ gc.collect()
26
+ torch.cuda.empty_cache()
27
+ RAFT_model = None
28
+
29
+ def RAFT_estimate_flow(frame1, frame2, device='cuda'):
30
+ global RAFT_model
31
+
32
+ org_size = frame1.shape[1], frame1.shape[0]
33
+ size = frame1.shape[1] // 16 * 16, frame1.shape[0] // 16 * 16
34
+ frame1 = cv2.resize(frame1, size)
35
+ frame2 = cv2.resize(frame2, size)
36
+
37
+ model_path = ph.models_path + '/RAFT/raft-things.pth'
38
+ remote_model_path = 'https://drive.google.com/uc?id=1MqDajR89k-xLV0HIrmJ0k-n8ZpG6_suM'
39
+
40
+ if not os.path.isfile(model_path):
41
+ from basicsr.utils.download_util import load_file_from_url
42
+ os.makedirs(os.path.dirname(model_path), exist_ok=True)
43
+ load_file_from_url(remote_model_path, file_name=model_path)
44
+
45
+ if RAFT_model is None:
46
+ args = argparse.Namespace(**{
47
+ 'model': ph.models_path + '/RAFT/raft-things.pth',
48
+ 'mixed_precision': True,
49
+ 'small': False,
50
+ 'alternate_corr': False,
51
+ 'path': ""
52
+ })
53
+
54
+ RAFT_model = torch.nn.DataParallel(RAFT(args))
55
+ RAFT_model.load_state_dict(torch.load(args.model))
56
+
57
+ RAFT_model = RAFT_model.module
58
+ RAFT_model.to(device)
59
+ RAFT_model.eval()
60
+
61
+ with torch.no_grad():
62
+ frame1_torch = torch.from_numpy(frame1).permute(2, 0, 1).float()[None].to(device)
63
+ frame2_torch = torch.from_numpy(frame2).permute(2, 0, 1).float()[None].to(device)
64
+
65
+ padder = InputPadder(frame1_torch.shape)
66
+ image1, image2 = padder.pad(frame1_torch, frame2_torch)
67
+
68
+ # estimate optical flow
69
+ _, next_flow = RAFT_model(image1, image2, iters=20, test_mode=True)
70
+ _, prev_flow = RAFT_model(image2, image1, iters=20, test_mode=True)
71
+
72
+ next_flow = next_flow[0].permute(1, 2, 0).cpu().numpy()
73
+ prev_flow = prev_flow[0].permute(1, 2, 0).cpu().numpy()
74
+
75
+ fb_flow = next_flow + prev_flow
76
+ fb_norm = np.linalg.norm(fb_flow, axis=2)
77
+
78
+ occlusion_mask = fb_norm[..., None].repeat(3, axis=-1)
79
+
80
+ next_flow = cv2.resize(next_flow, org_size)
81
+ prev_flow = cv2.resize(prev_flow, org_size)
82
+
83
+ return next_flow, prev_flow, occlusion_mask
84
+
85
+ def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_styled, args_dict):
86
+ h, w = cur_frame.shape[:2]
87
+ fl_w, fl_h = next_flow.shape[:2]
88
+
89
+ # normalize flow
90
+ next_flow = next_flow / np.array([fl_h,fl_w])
91
+ prev_flow = prev_flow / np.array([fl_h,fl_w])
92
+
93
+ # compute occlusion mask
94
+ fb_flow = next_flow + prev_flow
95
+ fb_norm = np.linalg.norm(fb_flow , axis=2)
96
+
97
+ zero_flow_mask = np.clip(1 - np.linalg.norm(prev_flow, axis=-1)[...,None] * 20, 0, 1)
98
+ diff_mask_flow = fb_norm[..., None] * zero_flow_mask
99
+
100
+ # resize flow
101
+ next_flow = cv2.resize(next_flow, (w, h))
102
+ next_flow = (next_flow * np.array([h,w])).astype(np.float32)
103
+ prev_flow = cv2.resize(prev_flow, (w, h))
104
+ prev_flow = (prev_flow * np.array([h,w])).astype(np.float32)
105
+
106
+ # Generate sampling grids
107
+ grid_y, grid_x = torch.meshgrid(torch.arange(0, h), torch.arange(0, w))
108
+ flow_grid = torch.stack((grid_x, grid_y), dim=0).float()
109
+ flow_grid += torch.from_numpy(prev_flow).permute(2, 0, 1)
110
+ flow_grid = flow_grid.unsqueeze(0)
111
+ flow_grid[:, 0, :, :] = 2 * flow_grid[:, 0, :, :] / (w - 1) - 1
112
+ flow_grid[:, 1, :, :] = 2 * flow_grid[:, 1, :, :] / (h - 1) - 1
113
+ flow_grid = flow_grid.permute(0, 2, 3, 1)
114
+
115
+
116
+ prev_frame_torch = torch.from_numpy(prev_frame).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
117
+ prev_frame_styled_torch = torch.from_numpy(prev_frame_styled).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
118
+
119
+ warped_frame = torch.nn.functional.grid_sample(prev_frame_torch, flow_grid, mode="nearest", padding_mode="reflection", align_corners=True).permute(0, 2, 3, 1)[0].numpy()
120
+ warped_frame_styled = torch.nn.functional.grid_sample(prev_frame_styled_torch, flow_grid, mode="nearest", padding_mode="reflection", align_corners=True).permute(0, 2, 3, 1)[0].numpy()
121
+
122
+ #warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
123
+ #warped_frame_styled = cv2.remap(prev_frame_styled, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
124
+
125
+
126
+ diff_mask_org = np.abs(warped_frame.astype(np.float32) - cur_frame.astype(np.float32)) / 255
127
+ diff_mask_org = diff_mask_org.max(axis = -1, keepdims=True)
128
+
129
+ diff_mask_stl = np.abs(warped_frame_styled.astype(np.float32) - cur_frame.astype(np.float32)) / 255
130
+ diff_mask_stl = diff_mask_stl.max(axis = -1, keepdims=True)
131
+
132
+ alpha_mask = np.maximum.reduce([diff_mask_flow * args_dict['occlusion_mask_flow_multiplier'] * 10, \
133
+ diff_mask_org * args_dict['occlusion_mask_difo_multiplier'], \
134
+ diff_mask_stl * args_dict['occlusion_mask_difs_multiplier']]) #
135
+ alpha_mask = alpha_mask.repeat(3, axis = -1)
136
+
137
+ #alpha_mask_blured = cv2.dilate(alpha_mask, np.ones((5, 5), np.float32))
138
+ if args_dict['occlusion_mask_blur'] > 0:
139
+ blur_filter_size = min(w,h) // 15 | 1
140
+ alpha_mask = cv2.GaussianBlur(alpha_mask, (blur_filter_size, blur_filter_size) , args_dict['occlusion_mask_blur'], cv2.BORDER_REFLECT)
141
+
142
+ alpha_mask = np.clip(alpha_mask, 0, 1)
143
+
144
+ return alpha_mask, warped_frame_styled
145
+
146
+ def frames_norm(frame): return frame / 127.5 - 1
147
+
148
+ def flow_norm(flow): return flow / 255
149
+
150
+ def occl_norm(occl): return occl / 127.5 - 1
151
+
152
+ def frames_renorm(frame): return (frame + 1) * 127.5
153
+
154
+ def flow_renorm(flow): return flow * 255
155
+
156
+ def occl_renorm(occl): return (occl + 1) * 127.5
SD-CN-Animation/scripts/core/txt2vid.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys, os
2
+
3
+ import torch
4
+ import gc
5
+ import numpy as np
6
+ from PIL import Image
7
+
8
+ import modules.paths as ph
9
+ from modules.shared import devices
10
+
11
+ from scripts.core import utils, flow_utils
12
+ from FloweR.model import FloweR
13
+
14
+ import skimage
15
+ import datetime
16
+ import cv2
17
+ import gradio as gr
18
+ import time
19
+
20
+ FloweR_model = None
21
+ DEVICE = 'cpu'
22
+ def FloweR_clear_memory():
23
+ global FloweR_model
24
+ del FloweR_model
25
+ gc.collect()
26
+ torch.cuda.empty_cache()
27
+ FloweR_model = None
28
+
29
+ def FloweR_load_model(w, h):
30
+ global DEVICE, FloweR_model
31
+ DEVICE = devices.get_optimal_device()
32
+
33
+ model_path = ph.models_path + '/FloweR/FloweR_0.1.2.pth'
34
+ #remote_model_path = 'https://drive.google.com/uc?id=1K7gXUosgxU729_l-osl1HBU5xqyLsALv' #FloweR_0.1.1.pth
35
+ remote_model_path = 'https://drive.google.com/uc?id=1-UYsTXkdUkHLgtPK1Y5_7kKzCgzL_Z6o' #FloweR_0.1.2.pth
36
+
37
+ if not os.path.isfile(model_path):
38
+ from basicsr.utils.download_util import load_file_from_url
39
+ os.makedirs(os.path.dirname(model_path), exist_ok=True)
40
+ load_file_from_url(remote_model_path, file_name=model_path)
41
+
42
+
43
+ FloweR_model = FloweR(input_size = (h, w))
44
+ FloweR_model.load_state_dict(torch.load(model_path, map_location=DEVICE))
45
+ # Move the model to the device
46
+ FloweR_model = FloweR_model.to(DEVICE)
47
+ FloweR_model.eval()
48
+
49
+ def read_frame_from_video(input_video):
50
+ if input_video is None: return None
51
+
52
+ # Reading video file
53
+ if input_video.isOpened():
54
+ ret, cur_frame = input_video.read()
55
+ if cur_frame is not None:
56
+ cur_frame = cv2.cvtColor(cur_frame, cv2.COLOR_BGR2RGB)
57
+ else:
58
+ cur_frame = None
59
+ input_video.release()
60
+ input_video = None
61
+
62
+ return cur_frame
63
+
64
+ def start_process(*args):
65
+ processing_start_time = time.time()
66
+ args_dict = utils.args_to_dict(*args)
67
+ args_dict = utils.get_mode_args('t2v', args_dict)
68
+
69
+ # Open the input video file
70
+ input_video = None
71
+ if args_dict['file'] is not None:
72
+ input_video = cv2.VideoCapture(args_dict['file'].name)
73
+
74
+ # Create an output video file with the same fps, width, and height as the input video
75
+ output_video_name = f'outputs/sd-cn-animation/txt2vid/{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.mp4'
76
+ output_video_folder = os.path.splitext(output_video_name)[0]
77
+ os.makedirs(os.path.dirname(output_video_name), exist_ok=True)
78
+
79
+ #if args_dict['save_frames_check']:
80
+ os.makedirs(output_video_folder, exist_ok=True)
81
+
82
+ # Writing to current params to params.json
83
+ setts_json = utils.export_settings(*args)
84
+ with open(os.path.join(output_video_folder, "params.json"), "w") as outfile:
85
+ outfile.write(setts_json)
86
+
87
+ curr_frame = None
88
+ prev_frame = None
89
+
90
+ def save_result_to_image(image, ind):
91
+ if args_dict['save_frames_check']:
92
+ cv2.imwrite(os.path.join(output_video_folder, f'{ind:05d}.png'), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
93
+
94
+ def set_cn_frame_input():
95
+ if args_dict['cn_frame_send'] == 0: # Current generated frame"
96
+ pass
97
+ elif args_dict['cn_frame_send'] == 1: # Current generated frame"
98
+ if curr_frame is not None:
99
+ utils.set_CNs_input_image(args_dict, Image.fromarray(curr_frame), set_references=True)
100
+ elif args_dict['cn_frame_send'] == 2: # Previous generated frame
101
+ if prev_frame is not None:
102
+ utils.set_CNs_input_image(args_dict, Image.fromarray(prev_frame), set_references=True)
103
+ elif args_dict['cn_frame_send'] == 3: # Current reference video frame
104
+ if input_video is not None:
105
+ curr_video_frame = read_frame_from_video(input_video)
106
+ curr_video_frame = cv2.resize(curr_video_frame, (args_dict['width'], args_dict['height']))
107
+ utils.set_CNs_input_image(args_dict, Image.fromarray(curr_video_frame), set_references=True)
108
+ else:
109
+ raise Exception('There is no input video! Set it up first.')
110
+ else:
111
+ raise Exception('Incorrect cn_frame_send mode!')
112
+
113
+ set_cn_frame_input()
114
+
115
+ if args_dict['init_image'] is not None:
116
+ #resize array to args_dict['width'], args_dict['height']
117
+ image_array=args_dict['init_image']#this is a numpy array
118
+ init_frame = np.array(Image.fromarray(image_array).resize((args_dict['width'], args_dict['height'])).convert('RGB'))
119
+ processed_frame = init_frame.copy()
120
+ else:
121
+ processed_frames, _, _, _ = utils.txt2img(args_dict)
122
+ processed_frame = np.array(processed_frames[0])[...,:3]
123
+ #if input_video is not None:
124
+ # processed_frame = skimage.exposure.match_histograms(processed_frame, curr_video_frame, channel_axis=-1)
125
+ processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
126
+ init_frame = processed_frame.copy()
127
+
128
+ output_video = cv2.VideoWriter(output_video_name, cv2.VideoWriter_fourcc(*'mp4v'), args_dict['fps'], (args_dict['width'], args_dict['height']))
129
+ output_video.write(cv2.cvtColor(processed_frame, cv2.COLOR_RGB2BGR))
130
+
131
+ stat = f"Frame: 1 / {args_dict['length']}; " + utils.get_time_left(1, args_dict['length'], processing_start_time)
132
+ utils.shared.is_interrupted = False
133
+
134
+ save_result_to_image(processed_frame, 1)
135
+ yield stat, init_frame, None, None, processed_frame, None, gr.Button.update(interactive=False), gr.Button.update(interactive=True)
136
+
137
+ org_size = args_dict['width'], args_dict['height']
138
+ size = args_dict['width'] // 128 * 128, args_dict['height'] // 128 * 128
139
+ FloweR_load_model(size[0], size[1])
140
+
141
+ clip_frames = np.zeros((4, size[1], size[0], 3), dtype=np.uint8)
142
+
143
+ prev_frame = init_frame
144
+
145
+ for ind in range(args_dict['length'] - 1):
146
+ if utils.shared.is_interrupted: break
147
+
148
+ args_dict = utils.args_to_dict(*args)
149
+ args_dict = utils.get_mode_args('t2v', args_dict)
150
+
151
+ clip_frames = np.roll(clip_frames, -1, axis=0)
152
+ clip_frames[-1] = cv2.resize(prev_frame[...,:3], size)
153
+ clip_frames_torch = flow_utils.frames_norm(torch.from_numpy(clip_frames).to(DEVICE, dtype=torch.float32))
154
+
155
+ with torch.no_grad():
156
+ pred_data = FloweR_model(clip_frames_torch.unsqueeze(0))[0]
157
+
158
+ pred_flow = flow_utils.flow_renorm(pred_data[...,:2]).cpu().numpy()
159
+ pred_occl = flow_utils.occl_renorm(pred_data[...,2:3]).cpu().numpy().repeat(3, axis = -1)
160
+ pred_next = flow_utils.frames_renorm(pred_data[...,3:6]).cpu().numpy()
161
+
162
+ pred_occl = np.clip(pred_occl * 10, 0, 255).astype(np.uint8)
163
+ pred_next = np.clip(pred_next, 0, 255).astype(np.uint8)
164
+
165
+ pred_flow = cv2.resize(pred_flow, org_size)
166
+ pred_occl = cv2.resize(pred_occl, org_size)
167
+ pred_next = cv2.resize(pred_next, org_size)
168
+
169
+ curr_frame = pred_next.copy()
170
+
171
+ '''
172
+ pred_flow = pred_flow / (1 + np.linalg.norm(pred_flow, axis=-1, keepdims=True) * 0.05)
173
+ pred_flow = cv2.GaussianBlur(pred_flow, (31,31), 1, cv2.BORDER_REFLECT_101)
174
+
175
+ pred_occl = cv2.GaussianBlur(pred_occl, (21,21), 2, cv2.BORDER_REFLECT_101)
176
+ pred_occl = (np.abs(pred_occl / 255) ** 1.5) * 255
177
+ pred_occl = np.clip(pred_occl * 25, 0, 255).astype(np.uint8)
178
+
179
+ flow_map = pred_flow.copy()
180
+ flow_map[:,:,0] += np.arange(args_dict['width'])
181
+ flow_map[:,:,1] += np.arange(args_dict['height'])[:,np.newaxis]
182
+
183
+ warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT_101)
184
+ alpha_mask = pred_occl / 255.
185
+ #alpha_mask = np.clip(alpha_mask + np.random.normal(0, 0.4, size = alpha_mask.shape), 0, 1)
186
+ curr_frame = pred_next.astype(float) * alpha_mask + warped_frame.astype(float) * (1 - alpha_mask)
187
+ curr_frame = np.clip(curr_frame, 0, 255).astype(np.uint8)
188
+ #curr_frame = warped_frame.copy()
189
+ '''
190
+
191
+ set_cn_frame_input()
192
+
193
+ args_dict['mode'] = 4
194
+ args_dict['init_img'] = Image.fromarray(pred_next)
195
+ args_dict['mask_img'] = Image.fromarray(pred_occl)
196
+ args_dict['seed'] = -1
197
+ args_dict['denoising_strength'] = args_dict['processing_strength']
198
+
199
+ processed_frames, _, _, _ = utils.img2img(args_dict)
200
+ processed_frame = np.array(processed_frames[0])[...,:3]
201
+ #if input_video is not None:
202
+ # processed_frame = skimage.exposure.match_histograms(processed_frame, curr_video_frame, channel_axis=-1)
203
+ #else:
204
+ processed_frame = skimage.exposure.match_histograms(processed_frame, init_frame, channel_axis=-1)
205
+ processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
206
+
207
+ args_dict['mode'] = 0
208
+ args_dict['init_img'] = Image.fromarray(processed_frame)
209
+ args_dict['mask_img'] = None
210
+ args_dict['seed'] = -1
211
+ args_dict['denoising_strength'] = args_dict['fix_frame_strength']
212
+
213
+ #utils.set_CNs_input_image(args_dict, Image.fromarray(curr_frame))
214
+ processed_frames, _, _, _ = utils.img2img(args_dict)
215
+ processed_frame = np.array(processed_frames[0])[...,:3]
216
+ #if input_video is not None:
217
+ # processed_frame = skimage.exposure.match_histograms(processed_frame, curr_video_frame, channel_axis=-1)
218
+ #else:
219
+ processed_frame = skimage.exposure.match_histograms(processed_frame, init_frame, channel_axis=-1)
220
+ processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
221
+
222
+ output_video.write(cv2.cvtColor(processed_frame, cv2.COLOR_RGB2BGR))
223
+ prev_frame = processed_frame.copy()
224
+
225
+ save_result_to_image(processed_frame, ind + 2)
226
+ stat = f"Frame: {ind + 2} / {args_dict['length']}; " + utils.get_time_left(ind+2, args_dict['length'], processing_start_time)
227
+ yield stat, curr_frame, pred_occl, pred_next, processed_frame, None, gr.Button.update(interactive=False), gr.Button.update(interactive=True)
228
+
229
+ if input_video is not None: input_video.release()
230
+ output_video.release()
231
+ FloweR_clear_memory()
232
+
233
+ curr_frame = gr.Image.update()
234
+ occlusion_mask = gr.Image.update()
235
+ warped_styled_frame_ = gr.Image.update()
236
+ processed_frame = gr.Image.update()
237
+
238
+ # print('TOTAL TIME:', int(time.time() - processing_start_time))
239
+
240
+ yield 'done', curr_frame, occlusion_mask, warped_styled_frame_, processed_frame, output_video_name, gr.Button.update(interactive=True), gr.Button.update(interactive=False)
SD-CN-Animation/scripts/core/utils.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class shared:
2
+ is_interrupted = False
3
+ v2v_custom_inputs_size = 0
4
+ t2v_custom_inputs_size = 0
5
+
6
+ def get_component_names():
7
+ components_list = [
8
+ 'glo_sdcn_process_mode',
9
+ 'v2v_file', 'v2v_width', 'v2v_height', 'v2v_prompt', 'v2v_n_prompt', 'v2v_cfg_scale', 'v2v_seed', 'v2v_processing_strength', 'v2v_fix_frame_strength',
10
+ 'v2v_sampler_index', 'v2v_steps', 'v2v_override_settings',
11
+ 'v2v_occlusion_mask_blur', 'v2v_occlusion_mask_trailing', 'v2v_occlusion_mask_flow_multiplier', 'v2v_occlusion_mask_difo_multiplier', 'v2v_occlusion_mask_difs_multiplier',
12
+ 'v2v_step_1_processing_mode', 'v2v_step_1_blend_alpha', 'v2v_step_1_seed', 'v2v_step_2_seed',
13
+ 't2v_file','t2v_init_image', 't2v_width', 't2v_height', 't2v_prompt', 't2v_n_prompt', 't2v_cfg_scale', 't2v_seed', 't2v_processing_strength', 't2v_fix_frame_strength',
14
+ 't2v_sampler_index', 't2v_steps', 't2v_length', 't2v_fps', 't2v_cn_frame_send',
15
+ 'glo_save_frames_check'
16
+ ]
17
+
18
+ return components_list
19
+
20
+ def args_to_dict(*args): # converts list of argumets into dictionary for better handling of it
21
+ args_list = get_component_names()
22
+
23
+ # set default values for params that were not specified
24
+ args_dict = {
25
+ # video to video params
26
+ 'v2v_mode': 0,
27
+ 'v2v_prompt': '',
28
+ 'v2v_n_prompt': '',
29
+ 'v2v_prompt_styles': [],
30
+ 'v2v_init_video': None, # Always required
31
+
32
+ 'v2v_steps': 15,
33
+ 'v2v_sampler_index': 0, # 'Euler a'
34
+ 'v2v_mask_blur': 0,
35
+
36
+ 'v2v_inpainting_fill': 1, # original
37
+ 'v2v_restore_faces': False,
38
+ 'v2v_tiling': False,
39
+ 'v2v_n_iter': 1,
40
+ 'v2v_batch_size': 1,
41
+ 'v2v_cfg_scale': 5.5,
42
+ 'v2v_image_cfg_scale': 1.5,
43
+ 'v2v_denoising_strength': 0.75,
44
+ 'v2v_processing_strength': 0.85,
45
+ 'v2v_fix_frame_strength': 0.15,
46
+ 'v2v_seed': -1,
47
+ 'v2v_subseed': -1,
48
+ 'v2v_subseed_strength': 0,
49
+ 'v2v_seed_resize_from_h': 512,
50
+ 'v2v_seed_resize_from_w': 512,
51
+ 'v2v_seed_enable_extras': False,
52
+ 'v2v_height': 512,
53
+ 'v2v_width': 512,
54
+ 'v2v_resize_mode': 1,
55
+ 'v2v_inpaint_full_res': True,
56
+ 'v2v_inpaint_full_res_padding': 0,
57
+ 'v2v_inpainting_mask_invert': False,
58
+
59
+ # text to video params
60
+ 't2v_mode': 4,
61
+ 't2v_prompt': '',
62
+ 't2v_n_prompt': '',
63
+ 't2v_prompt_styles': [],
64
+ 't2v_init_img': None,
65
+ 't2v_mask_img': None,
66
+
67
+ 't2v_steps': 15,
68
+ 't2v_sampler_index': 0, # 'Euler a'
69
+ 't2v_mask_blur': 0,
70
+
71
+ 't2v_inpainting_fill': 1, # original
72
+ 't2v_restore_faces': False,
73
+ 't2v_tiling': False,
74
+ 't2v_n_iter': 1,
75
+ 't2v_batch_size': 1,
76
+ 't2v_cfg_scale': 5.5,
77
+ 't2v_image_cfg_scale': 1.5,
78
+ 't2v_denoising_strength': 0.75,
79
+ 't2v_processing_strength': 0.85,
80
+ 't2v_fix_frame_strength': 0.15,
81
+ 't2v_seed': -1,
82
+ 't2v_subseed': -1,
83
+ 't2v_subseed_strength': 0,
84
+ 't2v_seed_resize_from_h': 512,
85
+ 't2v_seed_resize_from_w': 512,
86
+ 't2v_seed_enable_extras': False,
87
+ 't2v_height': 512,
88
+ 't2v_width': 512,
89
+ 't2v_resize_mode': 1,
90
+ 't2v_inpaint_full_res': True,
91
+ 't2v_inpaint_full_res_padding': 0,
92
+ 't2v_inpainting_mask_invert': False,
93
+
94
+ 't2v_override_settings': [],
95
+ #'t2v_script_inputs': [0],
96
+
97
+ 't2v_fps': 12,
98
+ }
99
+
100
+ args = list(args)
101
+
102
+ for i in range(len(args_list)):
103
+ if (args[i] is None) and (args_list[i] in args_dict):
104
+ #args[i] = args_dict[args_list[i]]
105
+ pass
106
+ else:
107
+ args_dict[args_list[i]] = args[i]
108
+
109
+ args_dict['v2v_script_inputs'] = args[len(args_list):len(args_list)+shared.v2v_custom_inputs_size]
110
+ #print('v2v_script_inputs', args_dict['v2v_script_inputs'])
111
+ args_dict['t2v_script_inputs'] = args[len(args_list)+shared.v2v_custom_inputs_size:]
112
+ #print('t2v_script_inputs', args_dict['t2v_script_inputs'])
113
+ return args_dict
114
+
115
+ def get_mode_args(mode, args_dict):
116
+ mode_args_dict = {}
117
+ for key, value in args_dict.items():
118
+ if key[:3] in [mode, 'glo'] :
119
+ mode_args_dict[key[4:]] = value
120
+
121
+ return mode_args_dict
122
+
123
+ def set_CNs_input_image(args_dict, image, set_references = False):
124
+ for script_input in args_dict['script_inputs']:
125
+ if type(script_input).__name__ == 'UiControlNetUnit':
126
+ if script_input.module not in ["reference_only", "reference_adain", "reference_adain+attn"] or set_references:
127
+ script_input.image = np.array(image)
128
+ script_input.batch_images = [np.array(image)]
129
+
130
+ import time
131
+ import datetime
132
+
133
+ def get_time_left(ind, length, processing_start_time):
134
+ s_passed = int(time.time() - processing_start_time)
135
+ time_passed = datetime.timedelta(seconds=s_passed)
136
+ s_left = int(s_passed / ind * (length - ind))
137
+ time_left = datetime.timedelta(seconds=s_left)
138
+ return f"Time elapsed: {time_passed}; Time left: {time_left};"
139
+
140
+ import numpy as np
141
+ from PIL import Image, ImageOps, ImageFilter, ImageEnhance, ImageChops
142
+ from types import SimpleNamespace
143
+
144
+ from modules.generation_parameters_copypaste import create_override_settings_dict
145
+ from modules.processing import Processed, StableDiffusionProcessingImg2Img, StableDiffusionProcessingTxt2Img, process_images
146
+ import modules.processing as processing
147
+ from modules.ui import plaintext_to_html
148
+ import modules.images as images
149
+ import modules.scripts
150
+ from modules.shared import opts, devices, state
151
+ from modules import devices, sd_samplers, img2img
152
+ from modules import shared, sd_hijack, lowvram
153
+
154
+ # TODO: Refactor all the code below
155
+
156
+ def process_img(p, input_img, output_dir, inpaint_mask_dir, args):
157
+ processing.fix_seed(p)
158
+
159
+ #images = shared.listfiles(input_dir)
160
+ images = [input_img]
161
+
162
+ is_inpaint_batch = False
163
+ #if inpaint_mask_dir:
164
+ # inpaint_masks = shared.listfiles(inpaint_mask_dir)
165
+ # is_inpaint_batch = len(inpaint_masks) > 0
166
+ #if is_inpaint_batch:
167
+ # print(f"\nInpaint batch is enabled. {len(inpaint_masks)} masks found.")
168
+
169
+ #print(f"Will process {len(images)} images, creating {p.n_iter * p.batch_size} new images for each.")
170
+
171
+ save_normally = output_dir == ''
172
+
173
+ p.do_not_save_grid = True
174
+ p.do_not_save_samples = not save_normally
175
+
176
+ state.job_count = len(images) * p.n_iter
177
+
178
+ generated_images = []
179
+ for i, image in enumerate(images):
180
+ state.job = f"{i+1} out of {len(images)}"
181
+ if state.skipped:
182
+ state.skipped = False
183
+
184
+ if state.interrupted:
185
+ break
186
+
187
+ img = image #Image.open(image)
188
+ # Use the EXIF orientation of photos taken by smartphones.
189
+ img = ImageOps.exif_transpose(img)
190
+ p.init_images = [img] * p.batch_size
191
+
192
+ #if is_inpaint_batch:
193
+ # # try to find corresponding mask for an image using simple filename matching
194
+ # mask_image_path = os.path.join(inpaint_mask_dir, os.path.basename(image))
195
+ # # if not found use first one ("same mask for all images" use-case)
196
+ # if not mask_image_path in inpaint_masks:
197
+ # mask_image_path = inpaint_masks[0]
198
+ # mask_image = Image.open(mask_image_path)
199
+ # p.image_mask = mask_image
200
+
201
+ proc = modules.scripts.scripts_img2img.run(p, *args)
202
+ if proc is None:
203
+ proc = process_images(p)
204
+ generated_images.append(proc.images[0])
205
+
206
+ #for n, processed_image in enumerate(proc.images):
207
+ # filename = os.path.basename(image)
208
+
209
+ # if n > 0:
210
+ # left, right = os.path.splitext(filename)
211
+ # filename = f"{left}-{n}{right}"
212
+
213
+ # if not save_normally:
214
+ # os.makedirs(output_dir, exist_ok=True)
215
+ # if processed_image.mode == 'RGBA':
216
+ # processed_image = processed_image.convert("RGB")
217
+ # processed_image.save(os.path.join(output_dir, filename))
218
+
219
+ return generated_images
220
+
221
+ def img2img(args_dict):
222
+ args = SimpleNamespace(**args_dict)
223
+ override_settings = create_override_settings_dict(args.override_settings)
224
+
225
+ is_batch = args.mode == 5
226
+
227
+ if args.mode == 0: # img2img
228
+ image = args.init_img.convert("RGB")
229
+ mask = None
230
+ elif args.mode == 1: # img2img sketch
231
+ image = args.sketch.convert("RGB")
232
+ mask = None
233
+ elif args.mode == 2: # inpaint
234
+ image, mask = args.init_img_with_mask["image"], args.init_img_with_mask["mask"]
235
+ alpha_mask = ImageOps.invert(image.split()[-1]).convert('L').point(lambda x: 255 if x > 0 else 0, mode='1')
236
+ mask = ImageChops.lighter(alpha_mask, mask.convert('L')).convert('L')
237
+ image = image.convert("RGB")
238
+ elif args.mode == 3: # inpaint sketch
239
+ image = args.inpaint_color_sketch
240
+ orig = args.inpaint_color_sketch_orig or args.inpaint_color_sketch
241
+ pred = np.any(np.array(image) != np.array(orig), axis=-1)
242
+ mask = Image.fromarray(pred.astype(np.uint8) * 255, "L")
243
+ mask = ImageEnhance.Brightness(mask).enhance(1 - args.mask_alpha / 100)
244
+ blur = ImageFilter.GaussianBlur(args.mask_blur)
245
+ image = Image.composite(image.filter(blur), orig, mask.filter(blur))
246
+ image = image.convert("RGB")
247
+ elif args.mode == 4: # inpaint upload mask
248
+ #image = args.init_img_inpaint
249
+ #mask = args.init_mask_inpaint
250
+
251
+ image = args.init_img.convert("RGB")
252
+ mask = args.mask_img.convert("L")
253
+ else:
254
+ image = None
255
+ mask = None
256
+
257
+ # Use the EXIF orientation of photos taken by smartphones.
258
+ if image is not None:
259
+ image = ImageOps.exif_transpose(image)
260
+
261
+ assert 0. <= args.denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]'
262
+
263
+ p = StableDiffusionProcessingImg2Img(
264
+ sd_model=shared.sd_model,
265
+ outpath_samples=opts.outdir_samples or opts.outdir_img2img_samples,
266
+ outpath_grids=opts.outdir_grids or opts.outdir_img2img_grids,
267
+ prompt=args.prompt,
268
+ negative_prompt=args.n_prompt,
269
+ styles=args.prompt_styles,
270
+ seed=args.seed,
271
+ subseed=args.subseed,
272
+ subseed_strength=args.subseed_strength,
273
+ seed_resize_from_h=args.seed_resize_from_h,
274
+ seed_resize_from_w=args.seed_resize_from_w,
275
+ seed_enable_extras=args.seed_enable_extras,
276
+ sampler_name=sd_samplers.samplers_for_img2img[args.sampler_index].name,
277
+ batch_size=args.batch_size,
278
+ n_iter=args.n_iter,
279
+ steps=args.steps,
280
+ cfg_scale=args.cfg_scale,
281
+ width=args.width,
282
+ height=args.height,
283
+ restore_faces=args.restore_faces,
284
+ tiling=args.tiling,
285
+ init_images=[image],
286
+ mask=mask,
287
+ mask_blur=args.mask_blur,
288
+ inpainting_fill=args.inpainting_fill,
289
+ resize_mode=args.resize_mode,
290
+ denoising_strength=args.denoising_strength,
291
+ image_cfg_scale=args.image_cfg_scale,
292
+ inpaint_full_res=args.inpaint_full_res,
293
+ inpaint_full_res_padding=args.inpaint_full_res_padding,
294
+ inpainting_mask_invert=args.inpainting_mask_invert,
295
+ override_settings=override_settings,
296
+ )
297
+
298
+ p.scripts = modules.scripts.scripts_img2img
299
+ p.script_args = args.script_inputs
300
+
301
+ #if shared.cmd_opts.enable_console_prompts:
302
+ # print(f"\nimg2img: {args.prompt}", file=shared.progress_print_out)
303
+
304
+ if mask:
305
+ p.extra_generation_params["Mask blur"] = args.mask_blur
306
+
307
+ '''
308
+ if is_batch:
309
+ ...
310
+ # assert not shared.cmd_opts.hide_ui_dir_config, "Launched with --hide-ui-dir-config, batch img2img disabled"
311
+ # process_batch(p, img2img_batch_input_dir, img2img_batch_output_dir, img2img_batch_inpaint_mask_dir, args.script_inputs)
312
+ # processed = Processed(p, [], p.seed, "")
313
+ else:
314
+ processed = modules.scripts.scripts_img2img.run(p, *args.script_inputs)
315
+ if processed is None:
316
+ processed = process_images(p)
317
+ '''
318
+
319
+ generated_images = process_img(p, image, None, '', args.script_inputs)
320
+ processed = Processed(p, [], p.seed, "")
321
+ p.close()
322
+
323
+ shared.total_tqdm.clear()
324
+
325
+ generation_info_js = processed.js()
326
+ #if opts.samples_log_stdout:
327
+ # print(generation_info_js)
328
+
329
+ #if opts.do_not_show_images:
330
+ # processed.images = []
331
+
332
+ #print(generation_info_js, plaintext_to_html(processed.info), plaintext_to_html(processed.comments))
333
+ return generated_images, generation_info_js, plaintext_to_html(processed.info), plaintext_to_html(processed.comments)
334
+
335
+ def txt2img(args_dict):
336
+ args = SimpleNamespace(**args_dict)
337
+ override_settings = create_override_settings_dict(args.override_settings)
338
+
339
+ p = StableDiffusionProcessingTxt2Img(
340
+ sd_model=shared.sd_model,
341
+ outpath_samples=opts.outdir_samples or opts.outdir_txt2img_samples,
342
+ outpath_grids=opts.outdir_grids or opts.outdir_txt2img_grids,
343
+ prompt=args.prompt,
344
+ styles=args.prompt_styles,
345
+ negative_prompt=args.n_prompt,
346
+ seed=args.seed,
347
+ subseed=args.subseed,
348
+ subseed_strength=args.subseed_strength,
349
+ seed_resize_from_h=args.seed_resize_from_h,
350
+ seed_resize_from_w=args.seed_resize_from_w,
351
+ seed_enable_extras=args.seed_enable_extras,
352
+ sampler_name=sd_samplers.samplers[args.sampler_index].name,
353
+ batch_size=args.batch_size,
354
+ n_iter=args.n_iter,
355
+ steps=args.steps,
356
+ cfg_scale=args.cfg_scale,
357
+ width=args.width,
358
+ height=args.height,
359
+ restore_faces=args.restore_faces,
360
+ tiling=args.tiling,
361
+ #enable_hr=args.enable_hr,
362
+ #denoising_strength=args.denoising_strength if enable_hr else None,
363
+ #hr_scale=hr_scale,
364
+ #hr_upscaler=hr_upscaler,
365
+ #hr_second_pass_steps=hr_second_pass_steps,
366
+ #hr_resize_x=hr_resize_x,
367
+ #hr_resize_y=hr_resize_y,
368
+ override_settings=override_settings,
369
+ )
370
+
371
+ p.scripts = modules.scripts.scripts_txt2img
372
+ p.script_args = args.script_inputs
373
+
374
+ #if cmd_opts.enable_console_prompts:
375
+ # print(f"\ntxt2img: {prompt}", file=shared.progress_print_out)
376
+
377
+ processed = modules.scripts.scripts_txt2img.run(p, *args.script_inputs)
378
+
379
+ if processed is None:
380
+ processed = process_images(p)
381
+
382
+ p.close()
383
+
384
+ shared.total_tqdm.clear()
385
+
386
+ generation_info_js = processed.js()
387
+ #if opts.samples_log_stdout:
388
+ # print(generation_info_js)
389
+
390
+ #if opts.do_not_show_images:
391
+ # processed.images = []
392
+
393
+ return processed.images, generation_info_js, plaintext_to_html(processed.info), plaintext_to_html(processed.comments)
394
+
395
+
396
+ import json
397
+ def get_json(obj):
398
+ return json.loads(
399
+ json.dumps(obj, default=lambda o: getattr(o, '__dict__', str(o)))
400
+ )
401
+
402
+ def export_settings(*args):
403
+ args_dict = args_to_dict(*args)
404
+ if args[0] == 'vid2vid':
405
+ args_dict = get_mode_args('v2v', args_dict)
406
+ elif args[0] == 'txt2vid':
407
+ args_dict = get_mode_args('t2v', args_dict)
408
+ else:
409
+ msg = f"Unsupported processing mode: '{args[0]}'"
410
+ raise Exception(msg)
411
+
412
+ # convert CN params into a readable dict
413
+ cn_remove_list = ['low_vram', 'is_ui', 'input_mode', 'batch_images', 'output_dir', 'loopback', 'image']
414
+
415
+ args_dict['ControlNets'] = []
416
+ for script_input in args_dict['script_inputs']:
417
+ if type(script_input).__name__ == 'UiControlNetUnit':
418
+ cn_values_dict = get_json(script_input)
419
+ if cn_values_dict['enabled']:
420
+ for key in cn_remove_list:
421
+ if key in cn_values_dict: del cn_values_dict[key]
422
+ args_dict['ControlNets'].append(cn_values_dict)
423
+
424
+ # remove unimportant values
425
+ remove_list = ['save_frames_check', 'restore_faces', 'prompt_styles', 'mask_blur', 'inpainting_fill', 'tiling', 'n_iter', 'batch_size', 'subseed', 'subseed_strength', 'seed_resize_from_h', \
426
+ 'seed_resize_from_w', 'seed_enable_extras', 'resize_mode', 'inpaint_full_res', 'inpaint_full_res_padding', 'inpainting_mask_invert', 'file', 'denoising_strength', \
427
+ 'override_settings', 'script_inputs', 'init_img', 'mask_img', 'mode', 'init_video']
428
+
429
+ for key in remove_list:
430
+ if key in args_dict: del args_dict[key]
431
+
432
+ return json.dumps(args_dict, indent=2, default=lambda o: getattr(o, '__dict__', str(o)))
SD-CN-Animation/scripts/core/vid2vid.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys, os
2
+
3
+ import math
4
+ import os
5
+ import sys
6
+ import traceback
7
+
8
+ import numpy as np
9
+ from PIL import Image
10
+
11
+ from modules import devices, sd_samplers
12
+ from modules import shared, sd_hijack, lowvram
13
+
14
+ from modules.shared import devices
15
+ import modules.shared as shared
16
+
17
+ import gc
18
+ import cv2
19
+ import gradio as gr
20
+
21
+ import time
22
+ import skimage
23
+ import datetime
24
+
25
+ from scripts.core.flow_utils import RAFT_estimate_flow, RAFT_clear_memory, compute_diff_map
26
+ from scripts.core import utils
27
+
28
+ class sdcn_anim_tmp:
29
+ prepear_counter = 0
30
+ process_counter = 0
31
+ input_video = None
32
+ output_video = None
33
+ curr_frame = None
34
+ prev_frame = None
35
+ prev_frame_styled = None
36
+ prev_frame_alpha_mask = None
37
+ fps = None
38
+ total_frames = None
39
+ prepared_frames = None
40
+ prepared_next_flows = None
41
+ prepared_prev_flows = None
42
+ frames_prepared = False
43
+
44
+ def read_frame_from_video():
45
+ # Reading video file
46
+ if sdcn_anim_tmp.input_video.isOpened():
47
+ ret, cur_frame = sdcn_anim_tmp.input_video.read()
48
+ if cur_frame is not None:
49
+ cur_frame = cv2.cvtColor(cur_frame, cv2.COLOR_BGR2RGB)
50
+ else:
51
+ cur_frame = None
52
+ sdcn_anim_tmp.input_video.release()
53
+
54
+ return cur_frame
55
+
56
+ def get_cur_stat():
57
+ stat = f'Frames prepared: {sdcn_anim_tmp.prepear_counter + 1} / {sdcn_anim_tmp.total_frames}; '
58
+ stat += f'Frames processed: {sdcn_anim_tmp.process_counter + 1} / {sdcn_anim_tmp.total_frames}; '
59
+ return stat
60
+
61
+ def clear_memory_from_sd():
62
+ if shared.sd_model is not None:
63
+ sd_hijack.model_hijack.undo_hijack(shared.sd_model)
64
+ try:
65
+ lowvram.send_everything_to_cpu()
66
+ except Exception as e:
67
+ ...
68
+ del shared.sd_model
69
+ shared.sd_model = None
70
+ gc.collect()
71
+ devices.torch_gc()
72
+
73
+ def start_process(*args):
74
+ processing_start_time = time.time()
75
+ args_dict = utils.args_to_dict(*args)
76
+ args_dict = utils.get_mode_args('v2v', args_dict)
77
+
78
+ sdcn_anim_tmp.process_counter = 0
79
+ sdcn_anim_tmp.prepear_counter = 0
80
+
81
+ # Open the input video file
82
+ sdcn_anim_tmp.input_video = cv2.VideoCapture(args_dict['file'].name)
83
+
84
+ # Get useful info from the source video
85
+ sdcn_anim_tmp.fps = int(sdcn_anim_tmp.input_video.get(cv2.CAP_PROP_FPS))
86
+ sdcn_anim_tmp.total_frames = int(sdcn_anim_tmp.input_video.get(cv2.CAP_PROP_FRAME_COUNT))
87
+ loop_iterations = (sdcn_anim_tmp.total_frames-1) * 2
88
+
89
+ # Create an output video file with the same fps, width, and height as the input video
90
+ output_video_name = f'outputs/sd-cn-animation/vid2vid/{datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.mp4'
91
+ output_video_folder = os.path.splitext(output_video_name)[0]
92
+ os.makedirs(os.path.dirname(output_video_name), exist_ok=True)
93
+
94
+ if args_dict['save_frames_check']:
95
+ os.makedirs(output_video_folder, exist_ok=True)
96
+
97
+ def save_result_to_image(image, ind):
98
+ if args_dict['save_frames_check']:
99
+ cv2.imwrite(os.path.join(output_video_folder, f'{ind:05d}.png'), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
100
+
101
+ sdcn_anim_tmp.output_video = cv2.VideoWriter(output_video_name, cv2.VideoWriter_fourcc(*'mp4v'), sdcn_anim_tmp.fps, (args_dict['width'], args_dict['height']))
102
+
103
+ curr_frame = read_frame_from_video()
104
+ curr_frame = cv2.resize(curr_frame, (args_dict['width'], args_dict['height']))
105
+ sdcn_anim_tmp.prepared_frames = np.zeros((11, args_dict['height'], args_dict['width'], 3), dtype=np.uint8)
106
+ sdcn_anim_tmp.prepared_next_flows = np.zeros((10, args_dict['height'], args_dict['width'], 2))
107
+ sdcn_anim_tmp.prepared_prev_flows = np.zeros((10, args_dict['height'], args_dict['width'], 2))
108
+ sdcn_anim_tmp.prepared_frames[0] = curr_frame
109
+
110
+ args_dict['init_img'] = Image.fromarray(curr_frame)
111
+ utils.set_CNs_input_image(args_dict, Image.fromarray(curr_frame))
112
+ processed_frames, _, _, _ = utils.img2img(args_dict)
113
+ processed_frame = np.array(processed_frames[0])[...,:3]
114
+ processed_frame = skimage.exposure.match_histograms(processed_frame, curr_frame, channel_axis=None)
115
+ processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
116
+ #print('Processed frame ', 0)
117
+
118
+ sdcn_anim_tmp.curr_frame = curr_frame
119
+ sdcn_anim_tmp.prev_frame = curr_frame.copy()
120
+ sdcn_anim_tmp.prev_frame_styled = processed_frame.copy()
121
+ utils.shared.is_interrupted = False
122
+
123
+ save_result_to_image(processed_frame, 1)
124
+ stat = get_cur_stat() + utils.get_time_left(1, loop_iterations, processing_start_time)
125
+ yield stat, sdcn_anim_tmp.curr_frame, None, None, processed_frame, None, gr.Button.update(interactive=False), gr.Button.update(interactive=True)
126
+
127
+ for step in range(loop_iterations):
128
+ if utils.shared.is_interrupted: break
129
+
130
+ args_dict = utils.args_to_dict(*args)
131
+ args_dict = utils.get_mode_args('v2v', args_dict)
132
+
133
+ occlusion_mask = None
134
+ prev_frame = None
135
+ curr_frame = sdcn_anim_tmp.curr_frame
136
+ warped_styled_frame_ = gr.Image.update()
137
+ processed_frame = gr.Image.update()
138
+
139
+ prepare_steps = 10
140
+ if sdcn_anim_tmp.process_counter % prepare_steps == 0 and not sdcn_anim_tmp.frames_prepared: # prepare next 10 frames for processing
141
+ #clear_memory_from_sd()
142
+ device = devices.get_optimal_device()
143
+
144
+ curr_frame = read_frame_from_video()
145
+ if curr_frame is not None:
146
+ curr_frame = cv2.resize(curr_frame, (args_dict['width'], args_dict['height']))
147
+ prev_frame = sdcn_anim_tmp.prev_frame.copy()
148
+
149
+ next_flow, prev_flow, occlusion_mask = RAFT_estimate_flow(prev_frame, curr_frame, device=device)
150
+ occlusion_mask = np.clip(occlusion_mask * 0.1 * 255, 0, 255).astype(np.uint8)
151
+
152
+ cn = sdcn_anim_tmp.prepear_counter % 10
153
+ if sdcn_anim_tmp.prepear_counter % 10 == 0:
154
+ sdcn_anim_tmp.prepared_frames[cn] = sdcn_anim_tmp.prev_frame
155
+ sdcn_anim_tmp.prepared_frames[cn + 1] = curr_frame.copy()
156
+ sdcn_anim_tmp.prepared_next_flows[cn] = next_flow.copy()
157
+ sdcn_anim_tmp.prepared_prev_flows[cn] = prev_flow.copy()
158
+ #print('Prepared frame ', cn+1)
159
+
160
+ sdcn_anim_tmp.prev_frame = curr_frame.copy()
161
+
162
+ sdcn_anim_tmp.prepear_counter += 1
163
+ if sdcn_anim_tmp.prepear_counter % prepare_steps == 0 or \
164
+ sdcn_anim_tmp.prepear_counter >= sdcn_anim_tmp.total_frames - 1 or \
165
+ curr_frame is None:
166
+ # Remove RAFT from memory
167
+ RAFT_clear_memory()
168
+ sdcn_anim_tmp.frames_prepared = True
169
+ else:
170
+ # process frame
171
+ sdcn_anim_tmp.frames_prepared = False
172
+
173
+ cn = sdcn_anim_tmp.process_counter % 10
174
+ curr_frame = sdcn_anim_tmp.prepared_frames[cn+1][...,:3]
175
+ prev_frame = sdcn_anim_tmp.prepared_frames[cn][...,:3]
176
+ next_flow = sdcn_anim_tmp.prepared_next_flows[cn]
177
+ prev_flow = sdcn_anim_tmp.prepared_prev_flows[cn]
178
+
179
+ ### STEP 1
180
+ alpha_mask, warped_styled_frame = compute_diff_map(next_flow, prev_flow, prev_frame, curr_frame, sdcn_anim_tmp.prev_frame_styled, args_dict)
181
+ warped_styled_frame_ = warped_styled_frame.copy()
182
+
183
+ #fl_w, fl_h = prev_flow.shape[:2]
184
+ #prev_flow_n = prev_flow / np.array([fl_h,fl_w])
185
+ #flow_mask = np.clip(1 - np.linalg.norm(prev_flow_n, axis=-1)[...,None] * 20, 0, 1)
186
+ #alpha_mask = alpha_mask * flow_mask
187
+
188
+ if sdcn_anim_tmp.process_counter > 0 and args_dict['occlusion_mask_trailing']:
189
+ alpha_mask = alpha_mask + sdcn_anim_tmp.prev_frame_alpha_mask * 0.5
190
+ sdcn_anim_tmp.prev_frame_alpha_mask = alpha_mask
191
+
192
+ # alpha_mask = np.round(alpha_mask * 8) / 8 #> 0.3
193
+ alpha_mask = np.clip(alpha_mask, 0, 1)
194
+ occlusion_mask = np.clip(alpha_mask * 255, 0, 255).astype(np.uint8)
195
+
196
+ # fix warped styled frame from duplicated that occures on the places where flow is zero, but only because there is no place to get the color from
197
+ warped_styled_frame = curr_frame.astype(float) * alpha_mask + warped_styled_frame.astype(float) * (1 - alpha_mask)
198
+
199
+ # process current frame
200
+ # TODO: convert args_dict into separate dict that stores only params necessery for img2img processing
201
+ img2img_args_dict = args_dict #copy.deepcopy(args_dict)
202
+ img2img_args_dict['denoising_strength'] = args_dict['processing_strength']
203
+ if args_dict['step_1_processing_mode'] == 0: # Process full image then blend in occlusions
204
+ img2img_args_dict['mode'] = 0
205
+ img2img_args_dict['mask_img'] = None #Image.fromarray(occlusion_mask)
206
+ elif args_dict['step_1_processing_mode'] == 1: # Inpaint occlusions
207
+ img2img_args_dict['mode'] = 4
208
+ img2img_args_dict['mask_img'] = Image.fromarray(occlusion_mask)
209
+ else:
210
+ raise Exception('Incorrect step 1 processing mode!')
211
+
212
+ blend_alpha = args_dict['step_1_blend_alpha']
213
+ init_img = warped_styled_frame * (1 - blend_alpha) + curr_frame * blend_alpha
214
+ img2img_args_dict['init_img'] = Image.fromarray(np.clip(init_img, 0, 255).astype(np.uint8))
215
+ img2img_args_dict['seed'] = args_dict['step_1_seed']
216
+ utils.set_CNs_input_image(img2img_args_dict, Image.fromarray(curr_frame))
217
+ processed_frames, _, _, _ = utils.img2img(img2img_args_dict)
218
+ processed_frame = np.array(processed_frames[0])[...,:3]
219
+
220
+ # normalizing the colors
221
+ processed_frame = skimage.exposure.match_histograms(processed_frame, curr_frame, channel_axis=None)
222
+ processed_frame = processed_frame.astype(float) * alpha_mask + warped_styled_frame.astype(float) * (1 - alpha_mask)
223
+
224
+ #processed_frame = processed_frame * 0.94 + curr_frame * 0.06
225
+ processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
226
+ sdcn_anim_tmp.prev_frame_styled = processed_frame.copy()
227
+
228
+ ### STEP 2
229
+ if args_dict['fix_frame_strength'] > 0:
230
+ img2img_args_dict = args_dict #copy.deepcopy(args_dict)
231
+ img2img_args_dict['mode'] = 0
232
+ img2img_args_dict['init_img'] = Image.fromarray(processed_frame)
233
+ img2img_args_dict['mask_img'] = None
234
+ img2img_args_dict['denoising_strength'] = args_dict['fix_frame_strength']
235
+ img2img_args_dict['seed'] = args_dict['step_2_seed']
236
+ utils.set_CNs_input_image(img2img_args_dict, Image.fromarray(curr_frame))
237
+ processed_frames, _, _, _ = utils.img2img(img2img_args_dict)
238
+ processed_frame = np.array(processed_frames[0])
239
+ processed_frame = skimage.exposure.match_histograms(processed_frame, curr_frame, channel_axis=None)
240
+
241
+ processed_frame = np.clip(processed_frame, 0, 255).astype(np.uint8)
242
+ warped_styled_frame_ = np.clip(warped_styled_frame_, 0, 255).astype(np.uint8)
243
+
244
+ # Write the frame to the output video
245
+ frame_out = np.clip(processed_frame, 0, 255).astype(np.uint8)
246
+ frame_out = cv2.cvtColor(frame_out, cv2.COLOR_RGB2BGR)
247
+ sdcn_anim_tmp.output_video.write(frame_out)
248
+
249
+ sdcn_anim_tmp.process_counter += 1
250
+ #if sdcn_anim_tmp.process_counter >= sdcn_anim_tmp.total_frames - 1:
251
+ # sdcn_anim_tmp.input_video.release()
252
+ # sdcn_anim_tmp.output_video.release()
253
+ # sdcn_anim_tmp.prev_frame = None
254
+
255
+ save_result_to_image(processed_frame, sdcn_anim_tmp.process_counter + 1)
256
+
257
+ stat = get_cur_stat() + utils.get_time_left(step+2, loop_iterations+1, processing_start_time)
258
+ yield stat, curr_frame, occlusion_mask, warped_styled_frame_, processed_frame, None, gr.Button.update(interactive=False), gr.Button.update(interactive=True)
259
+
260
+ RAFT_clear_memory()
261
+
262
+ sdcn_anim_tmp.input_video.release()
263
+ sdcn_anim_tmp.output_video.release()
264
+
265
+ curr_frame = gr.Image.update()
266
+ occlusion_mask = gr.Image.update()
267
+ warped_styled_frame_ = gr.Image.update()
268
+ processed_frame = gr.Image.update()
269
+
270
+ yield get_cur_stat(), curr_frame, occlusion_mask, warped_styled_frame_, processed_frame, output_video_name, gr.Button.update(interactive=True), gr.Button.update(interactive=False)
Stable-Diffusion-Webui-Civitai-Helper/scripts/__pycache__/civitai_helper.cpython-310.pyc CHANGED
Binary files a/Stable-Diffusion-Webui-Civitai-Helper/scripts/__pycache__/civitai_helper.cpython-310.pyc and b/Stable-Diffusion-Webui-Civitai-Helper/scripts/__pycache__/civitai_helper.cpython-310.pyc differ
 
Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/__init__.cpython-310.pyc and b/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/__init__.cpython-310.pyc differ
 
Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/civitai.cpython-310.pyc CHANGED
Binary files a/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/civitai.cpython-310.pyc and b/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/civitai.cpython-310.pyc differ
 
Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/downloader.cpython-310.pyc CHANGED
Binary files a/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/downloader.cpython-310.pyc and b/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/downloader.cpython-310.pyc differ
 
Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/js_action_civitai.cpython-310.pyc CHANGED
Binary files a/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/js_action_civitai.cpython-310.pyc and b/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/js_action_civitai.cpython-310.pyc differ
 
Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/model.cpython-310.pyc CHANGED
Binary files a/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/model.cpython-310.pyc and b/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/model.cpython-310.pyc differ
 
Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/model_action_civitai.cpython-310.pyc CHANGED
Binary files a/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/model_action_civitai.cpython-310.pyc and b/Stable-Diffusion-Webui-Civitai-Helper/scripts/ch_lib/__pycache__/model_action_civitai.cpython-310.pyc differ