00a4ef3f9ad50e484da61893e60b77bb57629f7b9dd7d759346c4dda116cba15
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +12 -0
- SD-CN-Animation/FloweR/__pycache__/model.cpython-310.pyc +0 -0
- SD-CN-Animation/FloweR/model.py +191 -0
- SD-CN-Animation/LICENSE +22 -0
- SD-CN-Animation/RAFT/LICENSE +29 -0
- SD-CN-Animation/RAFT/__pycache__/corr.cpython-310.pyc +0 -0
- SD-CN-Animation/RAFT/__pycache__/extractor.cpython-310.pyc +0 -0
- SD-CN-Animation/RAFT/__pycache__/raft.cpython-310.pyc +0 -0
- SD-CN-Animation/RAFT/__pycache__/update.cpython-310.pyc +0 -0
- SD-CN-Animation/RAFT/corr.py +91 -0
- SD-CN-Animation/RAFT/extractor.py +267 -0
- SD-CN-Animation/RAFT/raft.py +144 -0
- SD-CN-Animation/RAFT/update.py +139 -0
- SD-CN-Animation/RAFT/utils/__init__.py +0 -0
- SD-CN-Animation/RAFT/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- SD-CN-Animation/RAFT/utils/__pycache__/utils.cpython-310.pyc +0 -0
- SD-CN-Animation/RAFT/utils/augmentor.py +246 -0
- SD-CN-Animation/RAFT/utils/flow_viz.py +132 -0
- SD-CN-Animation/RAFT/utils/frame_utils.py +137 -0
- SD-CN-Animation/RAFT/utils/utils.py +82 -0
- SD-CN-Animation/examples/bonefire_1.mp4 +0 -0
- SD-CN-Animation/examples/bonfire_1.gif +0 -0
- SD-CN-Animation/examples/cn_settings.png +0 -0
- SD-CN-Animation/examples/diamond_4.gif +0 -0
- SD-CN-Animation/examples/diamond_4.mp4 +0 -0
- SD-CN-Animation/examples/flower_1.gif +3 -0
- SD-CN-Animation/examples/flower_1.mp4 +3 -0
- SD-CN-Animation/examples/flower_11.mp4 +3 -0
- SD-CN-Animation/examples/girl_org.gif +3 -0
- SD-CN-Animation/examples/girl_to_jc.gif +3 -0
- SD-CN-Animation/examples/girl_to_jc.mp4 +3 -0
- SD-CN-Animation/examples/girl_to_wc.gif +3 -0
- SD-CN-Animation/examples/girl_to_wc.mp4 +3 -0
- SD-CN-Animation/examples/gold_1.gif +3 -0
- SD-CN-Animation/examples/gold_1.mp4 +0 -0
- SD-CN-Animation/examples/macaroni_1.gif +3 -0
- SD-CN-Animation/examples/macaroni_1.mp4 +0 -0
- SD-CN-Animation/examples/tree_2.gif +3 -0
- SD-CN-Animation/examples/tree_2.mp4 +3 -0
- SD-CN-Animation/examples/ui_preview.png +0 -0
- SD-CN-Animation/install.py +20 -0
- SD-CN-Animation/old_scripts/compute_flow.py +75 -0
- SD-CN-Animation/old_scripts/flow_utils.py +139 -0
- SD-CN-Animation/old_scripts/readme.md +133 -0
- SD-CN-Animation/old_scripts/txt2vid.py +208 -0
- SD-CN-Animation/old_scripts/vid2vid.py +237 -0
- SD-CN-Animation/readme.md +89 -0
- SD-CN-Animation/requirements.txt +1 -0
- SD-CN-Animation/scripts/__pycache__/base_ui.cpython-310.pyc +0 -0
- SD-CN-Animation/scripts/base_ui.py +252 -0
.gitattributes
CHANGED
@@ -43,3 +43,15 @@ sd_feed/assets/pinterest.png filter=lfs diff=lfs merge=lfs -text
|
|
43 |
sd-3dmodel-loader/models/Samba[[:space:]]Dancing.fbx filter=lfs diff=lfs merge=lfs -text
|
44 |
sd-3dmodel-loader/models/pose.vrm filter=lfs diff=lfs merge=lfs -text
|
45 |
sd-webui-3d-open-pose-editor/downloads/pose/0.5.1675469404/pose_solution_packed_assets.data filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
sd-3dmodel-loader/models/Samba[[:space:]]Dancing.fbx filter=lfs diff=lfs merge=lfs -text
|
44 |
sd-3dmodel-loader/models/pose.vrm filter=lfs diff=lfs merge=lfs -text
|
45 |
sd-webui-3d-open-pose-editor/downloads/pose/0.5.1675469404/pose_solution_packed_assets.data filter=lfs diff=lfs merge=lfs -text
|
46 |
+
SD-CN-Animation/examples/flower_1.gif filter=lfs diff=lfs merge=lfs -text
|
47 |
+
SD-CN-Animation/examples/flower_1.mp4 filter=lfs diff=lfs merge=lfs -text
|
48 |
+
SD-CN-Animation/examples/flower_11.mp4 filter=lfs diff=lfs merge=lfs -text
|
49 |
+
SD-CN-Animation/examples/girl_org.gif filter=lfs diff=lfs merge=lfs -text
|
50 |
+
SD-CN-Animation/examples/girl_to_jc.gif filter=lfs diff=lfs merge=lfs -text
|
51 |
+
SD-CN-Animation/examples/girl_to_jc.mp4 filter=lfs diff=lfs merge=lfs -text
|
52 |
+
SD-CN-Animation/examples/girl_to_wc.gif filter=lfs diff=lfs merge=lfs -text
|
53 |
+
SD-CN-Animation/examples/girl_to_wc.mp4 filter=lfs diff=lfs merge=lfs -text
|
54 |
+
SD-CN-Animation/examples/gold_1.gif filter=lfs diff=lfs merge=lfs -text
|
55 |
+
SD-CN-Animation/examples/macaroni_1.gif filter=lfs diff=lfs merge=lfs -text
|
56 |
+
SD-CN-Animation/examples/tree_2.gif filter=lfs diff=lfs merge=lfs -text
|
57 |
+
SD-CN-Animation/examples/tree_2.mp4 filter=lfs diff=lfs merge=lfs -text
|
SD-CN-Animation/FloweR/__pycache__/model.cpython-310.pyc
ADDED
Binary file (3.84 kB). View file
|
|
SD-CN-Animation/FloweR/model.py
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.functional as F
|
4 |
+
|
5 |
+
# Define the model
|
6 |
+
class FloweR(nn.Module):
|
7 |
+
def __init__(self, input_size = (384, 384), window_size = 4):
|
8 |
+
super(FloweR, self).__init__()
|
9 |
+
|
10 |
+
self.input_size = input_size
|
11 |
+
self.window_size = window_size
|
12 |
+
|
13 |
+
# 2 channels for optical flow
|
14 |
+
# 1 channel for occlusion mask
|
15 |
+
# 3 channels for next frame prediction
|
16 |
+
self.out_channels = 6
|
17 |
+
|
18 |
+
|
19 |
+
#INPUT: 384 x 384 x 4 * 3
|
20 |
+
|
21 |
+
### DOWNSCALE ###
|
22 |
+
self.conv_block_1 = nn.Sequential(
|
23 |
+
nn.Conv2d(3 * self.window_size, 128, kernel_size=3, stride=1, padding='same'),
|
24 |
+
nn.ReLU(),
|
25 |
+
) # 384 x 384 x 128
|
26 |
+
|
27 |
+
self.conv_block_2 = nn.Sequential(
|
28 |
+
nn.AvgPool2d(2),
|
29 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
30 |
+
nn.ReLU(),
|
31 |
+
) # 192 x 192 x 128
|
32 |
+
|
33 |
+
self.conv_block_3 = nn.Sequential(
|
34 |
+
nn.AvgPool2d(2),
|
35 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
36 |
+
nn.ReLU(),
|
37 |
+
) # 96 x 96 x 128
|
38 |
+
|
39 |
+
self.conv_block_4 = nn.Sequential(
|
40 |
+
nn.AvgPool2d(2),
|
41 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
42 |
+
nn.ReLU(),
|
43 |
+
) # 48 x 48 x 128
|
44 |
+
|
45 |
+
self.conv_block_5 = nn.Sequential(
|
46 |
+
nn.AvgPool2d(2),
|
47 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
48 |
+
nn.ReLU(),
|
49 |
+
) # 24 x 24 x 128
|
50 |
+
|
51 |
+
self.conv_block_6 = nn.Sequential(
|
52 |
+
nn.AvgPool2d(2),
|
53 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
54 |
+
nn.ReLU(),
|
55 |
+
) # 12 x 12 x 128
|
56 |
+
|
57 |
+
self.conv_block_7 = nn.Sequential(
|
58 |
+
nn.AvgPool2d(2),
|
59 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
60 |
+
nn.ReLU(),
|
61 |
+
) # 6 x 6 x 128
|
62 |
+
|
63 |
+
self.conv_block_8 = nn.Sequential(
|
64 |
+
nn.AvgPool2d(2),
|
65 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
66 |
+
nn.ReLU(),
|
67 |
+
) # 3 x 3 x 128 - 9 input tokens
|
68 |
+
|
69 |
+
### Transformer part ###
|
70 |
+
# To be done
|
71 |
+
|
72 |
+
### UPSCALE ###
|
73 |
+
self.conv_block_9 = nn.Sequential(
|
74 |
+
nn.Upsample(scale_factor=2),
|
75 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
76 |
+
nn.ReLU(),
|
77 |
+
) # 6 x 6 x 128
|
78 |
+
|
79 |
+
self.conv_block_10 = nn.Sequential(
|
80 |
+
nn.Upsample(scale_factor=2),
|
81 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
82 |
+
nn.ReLU(),
|
83 |
+
) # 12 x 12 x 128
|
84 |
+
|
85 |
+
self.conv_block_11 = nn.Sequential(
|
86 |
+
nn.Upsample(scale_factor=2),
|
87 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
88 |
+
nn.ReLU(),
|
89 |
+
) # 24 x 24 x 128
|
90 |
+
|
91 |
+
self.conv_block_12 = nn.Sequential(
|
92 |
+
nn.Upsample(scale_factor=2),
|
93 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
94 |
+
nn.ReLU(),
|
95 |
+
) # 48 x 48 x 128
|
96 |
+
|
97 |
+
self.conv_block_13 = nn.Sequential(
|
98 |
+
nn.Upsample(scale_factor=2),
|
99 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
100 |
+
nn.ReLU(),
|
101 |
+
) # 96 x 96 x 128
|
102 |
+
|
103 |
+
self.conv_block_14 = nn.Sequential(
|
104 |
+
nn.Upsample(scale_factor=2),
|
105 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
106 |
+
nn.ReLU(),
|
107 |
+
) # 192 x 192 x 128
|
108 |
+
|
109 |
+
self.conv_block_15 = nn.Sequential(
|
110 |
+
nn.Upsample(scale_factor=2),
|
111 |
+
nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
|
112 |
+
nn.ReLU(),
|
113 |
+
) # 384 x 384 x 128
|
114 |
+
|
115 |
+
self.conv_block_16 = nn.Conv2d(128, self.out_channels, kernel_size=3, stride=1, padding='same')
|
116 |
+
|
117 |
+
def forward(self, input_frames):
|
118 |
+
|
119 |
+
if input_frames.size(1) != self.window_size:
|
120 |
+
raise Exception(f'Shape of the input is not compatable. There should be exactly {self.window_size} frames in an input video.')
|
121 |
+
|
122 |
+
h, w = self.input_size
|
123 |
+
# batch, frames, height, width, colors
|
124 |
+
input_frames_permuted = input_frames.permute((0, 1, 4, 2, 3))
|
125 |
+
# batch, frames, colors, height, width
|
126 |
+
|
127 |
+
in_x = input_frames_permuted.reshape(-1, self.window_size * 3, self.input_size[0], self.input_size[1])
|
128 |
+
|
129 |
+
### DOWNSCALE ###
|
130 |
+
block_1_out = self.conv_block_1(in_x) # 384 x 384 x 128
|
131 |
+
block_2_out = self.conv_block_2(block_1_out) # 192 x 192 x 128
|
132 |
+
block_3_out = self.conv_block_3(block_2_out) # 96 x 96 x 128
|
133 |
+
block_4_out = self.conv_block_4(block_3_out) # 48 x 48 x 128
|
134 |
+
block_5_out = self.conv_block_5(block_4_out) # 24 x 24 x 128
|
135 |
+
block_6_out = self.conv_block_6(block_5_out) # 12 x 12 x 128
|
136 |
+
block_7_out = self.conv_block_7(block_6_out) # 6 x 6 x 128
|
137 |
+
block_8_out = self.conv_block_8(block_7_out) # 3 x 3 x 128
|
138 |
+
|
139 |
+
### UPSCALE ###
|
140 |
+
block_9_out = block_7_out + self.conv_block_9(block_8_out) # 6 x 6 x 128
|
141 |
+
block_10_out = block_6_out + self.conv_block_10(block_9_out) # 12 x 12 x 128
|
142 |
+
block_11_out = block_5_out + self.conv_block_11(block_10_out) # 24 x 24 x 128
|
143 |
+
block_12_out = block_4_out + self.conv_block_12(block_11_out) # 48 x 48 x 128
|
144 |
+
block_13_out = block_3_out + self.conv_block_13(block_12_out) # 96 x 96 x 128
|
145 |
+
block_14_out = block_2_out + self.conv_block_14(block_13_out) # 192 x 192 x 128
|
146 |
+
block_15_out = block_1_out + self.conv_block_15(block_14_out) # 384 x 384 x 128
|
147 |
+
|
148 |
+
block_16_out = self.conv_block_16(block_15_out) # 384 x 384 x (2 + 1 + 3)
|
149 |
+
out = block_16_out.reshape(-1, self.out_channels, self.input_size[0], self.input_size[1])
|
150 |
+
|
151 |
+
### for future model training ###
|
152 |
+
device = out.get_device()
|
153 |
+
|
154 |
+
pred_flow = out[:,:2,:,:] * 255 # (-255, 255)
|
155 |
+
pred_occl = (out[:,2:3,:,:] + 1) / 2 # [0, 1]
|
156 |
+
pred_next = out[:,3:6,:,:]
|
157 |
+
|
158 |
+
# Generate sampling grids
|
159 |
+
|
160 |
+
# Create grid to upsample input
|
161 |
+
'''
|
162 |
+
d = torch.linspace(-1, 1, 8)
|
163 |
+
meshx, meshy = torch.meshgrid((d, d))
|
164 |
+
grid = torch.stack((meshy, meshx), 2)
|
165 |
+
grid = grid.unsqueeze(0) '''
|
166 |
+
|
167 |
+
grid_y, grid_x = torch.meshgrid(torch.arange(0, h), torch.arange(0, w))
|
168 |
+
flow_grid = torch.stack((grid_x, grid_y), dim=0).float()
|
169 |
+
flow_grid = flow_grid.unsqueeze(0).to(device=device)
|
170 |
+
flow_grid = flow_grid + pred_flow
|
171 |
+
|
172 |
+
flow_grid[:, 0, :, :] = 2 * flow_grid[:, 0, :, :] / (w - 1) - 1
|
173 |
+
flow_grid[:, 1, :, :] = 2 * flow_grid[:, 1, :, :] / (h - 1) - 1
|
174 |
+
# batch, flow_chanels, height, width
|
175 |
+
flow_grid = flow_grid.permute(0, 2, 3, 1)
|
176 |
+
# batch, height, width, flow_chanels
|
177 |
+
|
178 |
+
previous_frame = input_frames_permuted[:, -1, :, :, :]
|
179 |
+
sampling_mode = "bilinear" if self.training else "nearest"
|
180 |
+
warped_frame = torch.nn.functional.grid_sample(previous_frame, flow_grid, mode=sampling_mode, padding_mode="reflection", align_corners=False)
|
181 |
+
alpha_mask = torch.clip(pred_occl * 10, 0, 1) * 0.04
|
182 |
+
pred_next = torch.clip(pred_next, -1, 1)
|
183 |
+
warped_frame = torch.clip(warped_frame, -1, 1)
|
184 |
+
next_frame = pred_next * alpha_mask + warped_frame * (1 - alpha_mask)
|
185 |
+
|
186 |
+
res = torch.cat((pred_flow / 255, pred_occl * 2 - 1, next_frame), dim=1)
|
187 |
+
|
188 |
+
# batch, channels, height, width
|
189 |
+
res = res.permute((0, 2, 3, 1))
|
190 |
+
# batch, height, width, channels
|
191 |
+
return res
|
SD-CN-Animation/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Alexey Borsky
|
4 |
+
|
5 |
+
The Software is subject to the following conditions:
|
6 |
+
|
7 |
+
The above copyright notice and this permission notice shall be included in all
|
8 |
+
copies or substantial portions of the Software.
|
9 |
+
|
10 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
11 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
12 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
13 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
14 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
15 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
16 |
+
SOFTWARE.
|
17 |
+
|
18 |
+
This repository can only be used for personal/research/non-commercial purposes.
|
19 |
+
However, for commercial requests, please contact us directly at
|
20 |
+
borsky.alexey@gmail.com. This restriction applies only to the code itself, all
|
21 |
+
derivative works made using this repository (i.e. images and video) can be
|
22 |
+
used for any purposes without restrictions.
|
SD-CN-Animation/RAFT/LICENSE
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BSD 3-Clause License
|
2 |
+
|
3 |
+
Copyright (c) 2020, princeton-vl
|
4 |
+
All rights reserved.
|
5 |
+
|
6 |
+
Redistribution and use in source and binary forms, with or without
|
7 |
+
modification, are permitted provided that the following conditions are met:
|
8 |
+
|
9 |
+
* Redistributions of source code must retain the above copyright notice, this
|
10 |
+
list of conditions and the following disclaimer.
|
11 |
+
|
12 |
+
* Redistributions in binary form must reproduce the above copyright notice,
|
13 |
+
this list of conditions and the following disclaimer in the documentation
|
14 |
+
and/or other materials provided with the distribution.
|
15 |
+
|
16 |
+
* Neither the name of the copyright holder nor the names of its
|
17 |
+
contributors may be used to endorse or promote products derived from
|
18 |
+
this software without specific prior written permission.
|
19 |
+
|
20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
21 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
22 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
24 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
27 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
28 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
SD-CN-Animation/RAFT/__pycache__/corr.cpython-310.pyc
ADDED
Binary file (3.08 kB). View file
|
|
SD-CN-Animation/RAFT/__pycache__/extractor.cpython-310.pyc
ADDED
Binary file (5.79 kB). View file
|
|
SD-CN-Animation/RAFT/__pycache__/raft.cpython-310.pyc
ADDED
Binary file (4.22 kB). View file
|
|
SD-CN-Animation/RAFT/__pycache__/update.cpython-310.pyc
ADDED
Binary file (5.64 kB). View file
|
|
SD-CN-Animation/RAFT/corr.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
from RAFT.utils.utils import bilinear_sampler, coords_grid
|
4 |
+
|
5 |
+
try:
|
6 |
+
import alt_cuda_corr
|
7 |
+
except:
|
8 |
+
# alt_cuda_corr is not compiled
|
9 |
+
pass
|
10 |
+
|
11 |
+
|
12 |
+
class CorrBlock:
|
13 |
+
def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
|
14 |
+
self.num_levels = num_levels
|
15 |
+
self.radius = radius
|
16 |
+
self.corr_pyramid = []
|
17 |
+
|
18 |
+
# all pairs correlation
|
19 |
+
corr = CorrBlock.corr(fmap1, fmap2)
|
20 |
+
|
21 |
+
batch, h1, w1, dim, h2, w2 = corr.shape
|
22 |
+
corr = corr.reshape(batch*h1*w1, dim, h2, w2)
|
23 |
+
|
24 |
+
self.corr_pyramid.append(corr)
|
25 |
+
for i in range(self.num_levels-1):
|
26 |
+
corr = F.avg_pool2d(corr, 2, stride=2)
|
27 |
+
self.corr_pyramid.append(corr)
|
28 |
+
|
29 |
+
def __call__(self, coords):
|
30 |
+
r = self.radius
|
31 |
+
coords = coords.permute(0, 2, 3, 1)
|
32 |
+
batch, h1, w1, _ = coords.shape
|
33 |
+
|
34 |
+
out_pyramid = []
|
35 |
+
for i in range(self.num_levels):
|
36 |
+
corr = self.corr_pyramid[i]
|
37 |
+
dx = torch.linspace(-r, r, 2*r+1, device=coords.device)
|
38 |
+
dy = torch.linspace(-r, r, 2*r+1, device=coords.device)
|
39 |
+
delta = torch.stack(torch.meshgrid(dy, dx), axis=-1)
|
40 |
+
|
41 |
+
centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i
|
42 |
+
delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2)
|
43 |
+
coords_lvl = centroid_lvl + delta_lvl
|
44 |
+
|
45 |
+
corr = bilinear_sampler(corr, coords_lvl)
|
46 |
+
corr = corr.view(batch, h1, w1, -1)
|
47 |
+
out_pyramid.append(corr)
|
48 |
+
|
49 |
+
out = torch.cat(out_pyramid, dim=-1)
|
50 |
+
return out.permute(0, 3, 1, 2).contiguous().float()
|
51 |
+
|
52 |
+
@staticmethod
|
53 |
+
def corr(fmap1, fmap2):
|
54 |
+
batch, dim, ht, wd = fmap1.shape
|
55 |
+
fmap1 = fmap1.view(batch, dim, ht*wd)
|
56 |
+
fmap2 = fmap2.view(batch, dim, ht*wd)
|
57 |
+
|
58 |
+
corr = torch.matmul(fmap1.transpose(1,2), fmap2)
|
59 |
+
corr = corr.view(batch, ht, wd, 1, ht, wd)
|
60 |
+
return corr / torch.sqrt(torch.tensor(dim).float())
|
61 |
+
|
62 |
+
|
63 |
+
class AlternateCorrBlock:
|
64 |
+
def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
|
65 |
+
self.num_levels = num_levels
|
66 |
+
self.radius = radius
|
67 |
+
|
68 |
+
self.pyramid = [(fmap1, fmap2)]
|
69 |
+
for i in range(self.num_levels):
|
70 |
+
fmap1 = F.avg_pool2d(fmap1, 2, stride=2)
|
71 |
+
fmap2 = F.avg_pool2d(fmap2, 2, stride=2)
|
72 |
+
self.pyramid.append((fmap1, fmap2))
|
73 |
+
|
74 |
+
def __call__(self, coords):
|
75 |
+
coords = coords.permute(0, 2, 3, 1)
|
76 |
+
B, H, W, _ = coords.shape
|
77 |
+
dim = self.pyramid[0][0].shape[1]
|
78 |
+
|
79 |
+
corr_list = []
|
80 |
+
for i in range(self.num_levels):
|
81 |
+
r = self.radius
|
82 |
+
fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1).contiguous()
|
83 |
+
fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1).contiguous()
|
84 |
+
|
85 |
+
coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous()
|
86 |
+
corr, = alt_cuda_corr.forward(fmap1_i, fmap2_i, coords_i, r)
|
87 |
+
corr_list.append(corr.squeeze(1))
|
88 |
+
|
89 |
+
corr = torch.stack(corr_list, dim=1)
|
90 |
+
corr = corr.reshape(B, -1, H, W)
|
91 |
+
return corr / torch.sqrt(torch.tensor(dim).float())
|
SD-CN-Animation/RAFT/extractor.py
ADDED
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
|
6 |
+
class ResidualBlock(nn.Module):
|
7 |
+
def __init__(self, in_planes, planes, norm_fn='group', stride=1):
|
8 |
+
super(ResidualBlock, self).__init__()
|
9 |
+
|
10 |
+
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
|
11 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
|
12 |
+
self.relu = nn.ReLU(inplace=True)
|
13 |
+
|
14 |
+
num_groups = planes // 8
|
15 |
+
|
16 |
+
if norm_fn == 'group':
|
17 |
+
self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
18 |
+
self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
19 |
+
if not stride == 1:
|
20 |
+
self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
21 |
+
|
22 |
+
elif norm_fn == 'batch':
|
23 |
+
self.norm1 = nn.BatchNorm2d(planes)
|
24 |
+
self.norm2 = nn.BatchNorm2d(planes)
|
25 |
+
if not stride == 1:
|
26 |
+
self.norm3 = nn.BatchNorm2d(planes)
|
27 |
+
|
28 |
+
elif norm_fn == 'instance':
|
29 |
+
self.norm1 = nn.InstanceNorm2d(planes)
|
30 |
+
self.norm2 = nn.InstanceNorm2d(planes)
|
31 |
+
if not stride == 1:
|
32 |
+
self.norm3 = nn.InstanceNorm2d(planes)
|
33 |
+
|
34 |
+
elif norm_fn == 'none':
|
35 |
+
self.norm1 = nn.Sequential()
|
36 |
+
self.norm2 = nn.Sequential()
|
37 |
+
if not stride == 1:
|
38 |
+
self.norm3 = nn.Sequential()
|
39 |
+
|
40 |
+
if stride == 1:
|
41 |
+
self.downsample = None
|
42 |
+
|
43 |
+
else:
|
44 |
+
self.downsample = nn.Sequential(
|
45 |
+
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
|
46 |
+
|
47 |
+
|
48 |
+
def forward(self, x):
|
49 |
+
y = x
|
50 |
+
y = self.relu(self.norm1(self.conv1(y)))
|
51 |
+
y = self.relu(self.norm2(self.conv2(y)))
|
52 |
+
|
53 |
+
if self.downsample is not None:
|
54 |
+
x = self.downsample(x)
|
55 |
+
|
56 |
+
return self.relu(x+y)
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
class BottleneckBlock(nn.Module):
|
61 |
+
def __init__(self, in_planes, planes, norm_fn='group', stride=1):
|
62 |
+
super(BottleneckBlock, self).__init__()
|
63 |
+
|
64 |
+
self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0)
|
65 |
+
self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride)
|
66 |
+
self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0)
|
67 |
+
self.relu = nn.ReLU(inplace=True)
|
68 |
+
|
69 |
+
num_groups = planes // 8
|
70 |
+
|
71 |
+
if norm_fn == 'group':
|
72 |
+
self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
|
73 |
+
self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
|
74 |
+
self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
75 |
+
if not stride == 1:
|
76 |
+
self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
|
77 |
+
|
78 |
+
elif norm_fn == 'batch':
|
79 |
+
self.norm1 = nn.BatchNorm2d(planes//4)
|
80 |
+
self.norm2 = nn.BatchNorm2d(planes//4)
|
81 |
+
self.norm3 = nn.BatchNorm2d(planes)
|
82 |
+
if not stride == 1:
|
83 |
+
self.norm4 = nn.BatchNorm2d(planes)
|
84 |
+
|
85 |
+
elif norm_fn == 'instance':
|
86 |
+
self.norm1 = nn.InstanceNorm2d(planes//4)
|
87 |
+
self.norm2 = nn.InstanceNorm2d(planes//4)
|
88 |
+
self.norm3 = nn.InstanceNorm2d(planes)
|
89 |
+
if not stride == 1:
|
90 |
+
self.norm4 = nn.InstanceNorm2d(planes)
|
91 |
+
|
92 |
+
elif norm_fn == 'none':
|
93 |
+
self.norm1 = nn.Sequential()
|
94 |
+
self.norm2 = nn.Sequential()
|
95 |
+
self.norm3 = nn.Sequential()
|
96 |
+
if not stride == 1:
|
97 |
+
self.norm4 = nn.Sequential()
|
98 |
+
|
99 |
+
if stride == 1:
|
100 |
+
self.downsample = None
|
101 |
+
|
102 |
+
else:
|
103 |
+
self.downsample = nn.Sequential(
|
104 |
+
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4)
|
105 |
+
|
106 |
+
|
107 |
+
def forward(self, x):
|
108 |
+
y = x
|
109 |
+
y = self.relu(self.norm1(self.conv1(y)))
|
110 |
+
y = self.relu(self.norm2(self.conv2(y)))
|
111 |
+
y = self.relu(self.norm3(self.conv3(y)))
|
112 |
+
|
113 |
+
if self.downsample is not None:
|
114 |
+
x = self.downsample(x)
|
115 |
+
|
116 |
+
return self.relu(x+y)
|
117 |
+
|
118 |
+
class BasicEncoder(nn.Module):
|
119 |
+
def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
|
120 |
+
super(BasicEncoder, self).__init__()
|
121 |
+
self.norm_fn = norm_fn
|
122 |
+
|
123 |
+
if self.norm_fn == 'group':
|
124 |
+
self.norm1 = nn.GroupNorm(num_groups=8, num_channels=64)
|
125 |
+
|
126 |
+
elif self.norm_fn == 'batch':
|
127 |
+
self.norm1 = nn.BatchNorm2d(64)
|
128 |
+
|
129 |
+
elif self.norm_fn == 'instance':
|
130 |
+
self.norm1 = nn.InstanceNorm2d(64)
|
131 |
+
|
132 |
+
elif self.norm_fn == 'none':
|
133 |
+
self.norm1 = nn.Sequential()
|
134 |
+
|
135 |
+
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
|
136 |
+
self.relu1 = nn.ReLU(inplace=True)
|
137 |
+
|
138 |
+
self.in_planes = 64
|
139 |
+
self.layer1 = self._make_layer(64, stride=1)
|
140 |
+
self.layer2 = self._make_layer(96, stride=2)
|
141 |
+
self.layer3 = self._make_layer(128, stride=2)
|
142 |
+
|
143 |
+
# output convolution
|
144 |
+
self.conv2 = nn.Conv2d(128, output_dim, kernel_size=1)
|
145 |
+
|
146 |
+
self.dropout = None
|
147 |
+
if dropout > 0:
|
148 |
+
self.dropout = nn.Dropout2d(p=dropout)
|
149 |
+
|
150 |
+
for m in self.modules():
|
151 |
+
if isinstance(m, nn.Conv2d):
|
152 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
153 |
+
elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
|
154 |
+
if m.weight is not None:
|
155 |
+
nn.init.constant_(m.weight, 1)
|
156 |
+
if m.bias is not None:
|
157 |
+
nn.init.constant_(m.bias, 0)
|
158 |
+
|
159 |
+
def _make_layer(self, dim, stride=1):
|
160 |
+
layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
|
161 |
+
layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
|
162 |
+
layers = (layer1, layer2)
|
163 |
+
|
164 |
+
self.in_planes = dim
|
165 |
+
return nn.Sequential(*layers)
|
166 |
+
|
167 |
+
|
168 |
+
def forward(self, x):
|
169 |
+
|
170 |
+
# if input is list, combine batch dimension
|
171 |
+
is_list = isinstance(x, tuple) or isinstance(x, list)
|
172 |
+
if is_list:
|
173 |
+
batch_dim = x[0].shape[0]
|
174 |
+
x = torch.cat(x, dim=0)
|
175 |
+
|
176 |
+
x = self.conv1(x)
|
177 |
+
x = self.norm1(x)
|
178 |
+
x = self.relu1(x)
|
179 |
+
|
180 |
+
x = self.layer1(x)
|
181 |
+
x = self.layer2(x)
|
182 |
+
x = self.layer3(x)
|
183 |
+
|
184 |
+
x = self.conv2(x)
|
185 |
+
|
186 |
+
if self.training and self.dropout is not None:
|
187 |
+
x = self.dropout(x)
|
188 |
+
|
189 |
+
if is_list:
|
190 |
+
x = torch.split(x, [batch_dim, batch_dim], dim=0)
|
191 |
+
|
192 |
+
return x
|
193 |
+
|
194 |
+
|
195 |
+
class SmallEncoder(nn.Module):
|
196 |
+
def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
|
197 |
+
super(SmallEncoder, self).__init__()
|
198 |
+
self.norm_fn = norm_fn
|
199 |
+
|
200 |
+
if self.norm_fn == 'group':
|
201 |
+
self.norm1 = nn.GroupNorm(num_groups=8, num_channels=32)
|
202 |
+
|
203 |
+
elif self.norm_fn == 'batch':
|
204 |
+
self.norm1 = nn.BatchNorm2d(32)
|
205 |
+
|
206 |
+
elif self.norm_fn == 'instance':
|
207 |
+
self.norm1 = nn.InstanceNorm2d(32)
|
208 |
+
|
209 |
+
elif self.norm_fn == 'none':
|
210 |
+
self.norm1 = nn.Sequential()
|
211 |
+
|
212 |
+
self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3)
|
213 |
+
self.relu1 = nn.ReLU(inplace=True)
|
214 |
+
|
215 |
+
self.in_planes = 32
|
216 |
+
self.layer1 = self._make_layer(32, stride=1)
|
217 |
+
self.layer2 = self._make_layer(64, stride=2)
|
218 |
+
self.layer3 = self._make_layer(96, stride=2)
|
219 |
+
|
220 |
+
self.dropout = None
|
221 |
+
if dropout > 0:
|
222 |
+
self.dropout = nn.Dropout2d(p=dropout)
|
223 |
+
|
224 |
+
self.conv2 = nn.Conv2d(96, output_dim, kernel_size=1)
|
225 |
+
|
226 |
+
for m in self.modules():
|
227 |
+
if isinstance(m, nn.Conv2d):
|
228 |
+
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
229 |
+
elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
|
230 |
+
if m.weight is not None:
|
231 |
+
nn.init.constant_(m.weight, 1)
|
232 |
+
if m.bias is not None:
|
233 |
+
nn.init.constant_(m.bias, 0)
|
234 |
+
|
235 |
+
def _make_layer(self, dim, stride=1):
|
236 |
+
layer1 = BottleneckBlock(self.in_planes, dim, self.norm_fn, stride=stride)
|
237 |
+
layer2 = BottleneckBlock(dim, dim, self.norm_fn, stride=1)
|
238 |
+
layers = (layer1, layer2)
|
239 |
+
|
240 |
+
self.in_planes = dim
|
241 |
+
return nn.Sequential(*layers)
|
242 |
+
|
243 |
+
|
244 |
+
def forward(self, x):
|
245 |
+
|
246 |
+
# if input is list, combine batch dimension
|
247 |
+
is_list = isinstance(x, tuple) or isinstance(x, list)
|
248 |
+
if is_list:
|
249 |
+
batch_dim = x[0].shape[0]
|
250 |
+
x = torch.cat(x, dim=0)
|
251 |
+
|
252 |
+
x = self.conv1(x)
|
253 |
+
x = self.norm1(x)
|
254 |
+
x = self.relu1(x)
|
255 |
+
|
256 |
+
x = self.layer1(x)
|
257 |
+
x = self.layer2(x)
|
258 |
+
x = self.layer3(x)
|
259 |
+
x = self.conv2(x)
|
260 |
+
|
261 |
+
if self.training and self.dropout is not None:
|
262 |
+
x = self.dropout(x)
|
263 |
+
|
264 |
+
if is_list:
|
265 |
+
x = torch.split(x, [batch_dim, batch_dim], dim=0)
|
266 |
+
|
267 |
+
return x
|
SD-CN-Animation/RAFT/raft.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
|
6 |
+
from RAFT.update import BasicUpdateBlock, SmallUpdateBlock
|
7 |
+
from RAFT.extractor import BasicEncoder, SmallEncoder
|
8 |
+
from RAFT.corr import CorrBlock, AlternateCorrBlock
|
9 |
+
from RAFT.utils.utils import bilinear_sampler, coords_grid, upflow8
|
10 |
+
|
11 |
+
try:
|
12 |
+
autocast = torch.cuda.amp.autocast
|
13 |
+
except:
|
14 |
+
# dummy autocast for PyTorch < 1.6
|
15 |
+
class autocast:
|
16 |
+
def __init__(self, enabled):
|
17 |
+
pass
|
18 |
+
def __enter__(self):
|
19 |
+
pass
|
20 |
+
def __exit__(self, *args):
|
21 |
+
pass
|
22 |
+
|
23 |
+
|
24 |
+
class RAFT(nn.Module):
|
25 |
+
def __init__(self, args):
|
26 |
+
super(RAFT, self).__init__()
|
27 |
+
self.args = args
|
28 |
+
|
29 |
+
if args.small:
|
30 |
+
self.hidden_dim = hdim = 96
|
31 |
+
self.context_dim = cdim = 64
|
32 |
+
args.corr_levels = 4
|
33 |
+
args.corr_radius = 3
|
34 |
+
|
35 |
+
else:
|
36 |
+
self.hidden_dim = hdim = 128
|
37 |
+
self.context_dim = cdim = 128
|
38 |
+
args.corr_levels = 4
|
39 |
+
args.corr_radius = 4
|
40 |
+
|
41 |
+
if 'dropout' not in self.args:
|
42 |
+
self.args.dropout = 0
|
43 |
+
|
44 |
+
if 'alternate_corr' not in self.args:
|
45 |
+
self.args.alternate_corr = False
|
46 |
+
|
47 |
+
# feature network, context network, and update block
|
48 |
+
if args.small:
|
49 |
+
self.fnet = SmallEncoder(output_dim=128, norm_fn='instance', dropout=args.dropout)
|
50 |
+
self.cnet = SmallEncoder(output_dim=hdim+cdim, norm_fn='none', dropout=args.dropout)
|
51 |
+
self.update_block = SmallUpdateBlock(self.args, hidden_dim=hdim)
|
52 |
+
|
53 |
+
else:
|
54 |
+
self.fnet = BasicEncoder(output_dim=256, norm_fn='instance', dropout=args.dropout)
|
55 |
+
self.cnet = BasicEncoder(output_dim=hdim+cdim, norm_fn='batch', dropout=args.dropout)
|
56 |
+
self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim)
|
57 |
+
|
58 |
+
def freeze_bn(self):
|
59 |
+
for m in self.modules():
|
60 |
+
if isinstance(m, nn.BatchNorm2d):
|
61 |
+
m.eval()
|
62 |
+
|
63 |
+
def initialize_flow(self, img):
|
64 |
+
""" Flow is represented as difference between two coordinate grids flow = coords1 - coords0"""
|
65 |
+
N, C, H, W = img.shape
|
66 |
+
coords0 = coords_grid(N, H//8, W//8, device=img.device)
|
67 |
+
coords1 = coords_grid(N, H//8, W//8, device=img.device)
|
68 |
+
|
69 |
+
# optical flow computed as difference: flow = coords1 - coords0
|
70 |
+
return coords0, coords1
|
71 |
+
|
72 |
+
def upsample_flow(self, flow, mask):
|
73 |
+
""" Upsample flow field [H/8, W/8, 2] -> [H, W, 2] using convex combination """
|
74 |
+
N, _, H, W = flow.shape
|
75 |
+
mask = mask.view(N, 1, 9, 8, 8, H, W)
|
76 |
+
mask = torch.softmax(mask, dim=2)
|
77 |
+
|
78 |
+
up_flow = F.unfold(8 * flow, [3,3], padding=1)
|
79 |
+
up_flow = up_flow.view(N, 2, 9, 1, 1, H, W)
|
80 |
+
|
81 |
+
up_flow = torch.sum(mask * up_flow, dim=2)
|
82 |
+
up_flow = up_flow.permute(0, 1, 4, 2, 5, 3)
|
83 |
+
return up_flow.reshape(N, 2, 8*H, 8*W)
|
84 |
+
|
85 |
+
|
86 |
+
def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_mode=False):
|
87 |
+
""" Estimate optical flow between pair of frames """
|
88 |
+
|
89 |
+
image1 = 2 * (image1 / 255.0) - 1.0
|
90 |
+
image2 = 2 * (image2 / 255.0) - 1.0
|
91 |
+
|
92 |
+
image1 = image1.contiguous()
|
93 |
+
image2 = image2.contiguous()
|
94 |
+
|
95 |
+
hdim = self.hidden_dim
|
96 |
+
cdim = self.context_dim
|
97 |
+
|
98 |
+
# run the feature network
|
99 |
+
with autocast(enabled=self.args.mixed_precision):
|
100 |
+
fmap1, fmap2 = self.fnet([image1, image2])
|
101 |
+
|
102 |
+
fmap1 = fmap1.float()
|
103 |
+
fmap2 = fmap2.float()
|
104 |
+
if self.args.alternate_corr:
|
105 |
+
corr_fn = AlternateCorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
|
106 |
+
else:
|
107 |
+
corr_fn = CorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
|
108 |
+
|
109 |
+
# run the context network
|
110 |
+
with autocast(enabled=self.args.mixed_precision):
|
111 |
+
cnet = self.cnet(image1)
|
112 |
+
net, inp = torch.split(cnet, [hdim, cdim], dim=1)
|
113 |
+
net = torch.tanh(net)
|
114 |
+
inp = torch.relu(inp)
|
115 |
+
|
116 |
+
coords0, coords1 = self.initialize_flow(image1)
|
117 |
+
|
118 |
+
if flow_init is not None:
|
119 |
+
coords1 = coords1 + flow_init
|
120 |
+
|
121 |
+
flow_predictions = []
|
122 |
+
for itr in range(iters):
|
123 |
+
coords1 = coords1.detach()
|
124 |
+
corr = corr_fn(coords1) # index correlation volume
|
125 |
+
|
126 |
+
flow = coords1 - coords0
|
127 |
+
with autocast(enabled=self.args.mixed_precision):
|
128 |
+
net, up_mask, delta_flow = self.update_block(net, inp, corr, flow)
|
129 |
+
|
130 |
+
# F(t+1) = F(t) + \Delta(t)
|
131 |
+
coords1 = coords1 + delta_flow
|
132 |
+
|
133 |
+
# upsample predictions
|
134 |
+
if up_mask is None:
|
135 |
+
flow_up = upflow8(coords1 - coords0)
|
136 |
+
else:
|
137 |
+
flow_up = self.upsample_flow(coords1 - coords0, up_mask)
|
138 |
+
|
139 |
+
flow_predictions.append(flow_up)
|
140 |
+
|
141 |
+
if test_mode:
|
142 |
+
return coords1 - coords0, flow_up
|
143 |
+
|
144 |
+
return flow_predictions
|
SD-CN-Animation/RAFT/update.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
|
6 |
+
class FlowHead(nn.Module):
|
7 |
+
def __init__(self, input_dim=128, hidden_dim=256):
|
8 |
+
super(FlowHead, self).__init__()
|
9 |
+
self.conv1 = nn.Conv2d(input_dim, hidden_dim, 3, padding=1)
|
10 |
+
self.conv2 = nn.Conv2d(hidden_dim, 2, 3, padding=1)
|
11 |
+
self.relu = nn.ReLU(inplace=True)
|
12 |
+
|
13 |
+
def forward(self, x):
|
14 |
+
return self.conv2(self.relu(self.conv1(x)))
|
15 |
+
|
16 |
+
class ConvGRU(nn.Module):
|
17 |
+
def __init__(self, hidden_dim=128, input_dim=192+128):
|
18 |
+
super(ConvGRU, self).__init__()
|
19 |
+
self.convz = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
|
20 |
+
self.convr = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
|
21 |
+
self.convq = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
|
22 |
+
|
23 |
+
def forward(self, h, x):
|
24 |
+
hx = torch.cat([h, x], dim=1)
|
25 |
+
|
26 |
+
z = torch.sigmoid(self.convz(hx))
|
27 |
+
r = torch.sigmoid(self.convr(hx))
|
28 |
+
q = torch.tanh(self.convq(torch.cat([r*h, x], dim=1)))
|
29 |
+
|
30 |
+
h = (1-z) * h + z * q
|
31 |
+
return h
|
32 |
+
|
33 |
+
class SepConvGRU(nn.Module):
|
34 |
+
def __init__(self, hidden_dim=128, input_dim=192+128):
|
35 |
+
super(SepConvGRU, self).__init__()
|
36 |
+
self.convz1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
|
37 |
+
self.convr1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
|
38 |
+
self.convq1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
|
39 |
+
|
40 |
+
self.convz2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
|
41 |
+
self.convr2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
|
42 |
+
self.convq2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
|
43 |
+
|
44 |
+
|
45 |
+
def forward(self, h, x):
|
46 |
+
# horizontal
|
47 |
+
hx = torch.cat([h, x], dim=1)
|
48 |
+
z = torch.sigmoid(self.convz1(hx))
|
49 |
+
r = torch.sigmoid(self.convr1(hx))
|
50 |
+
q = torch.tanh(self.convq1(torch.cat([r*h, x], dim=1)))
|
51 |
+
h = (1-z) * h + z * q
|
52 |
+
|
53 |
+
# vertical
|
54 |
+
hx = torch.cat([h, x], dim=1)
|
55 |
+
z = torch.sigmoid(self.convz2(hx))
|
56 |
+
r = torch.sigmoid(self.convr2(hx))
|
57 |
+
q = torch.tanh(self.convq2(torch.cat([r*h, x], dim=1)))
|
58 |
+
h = (1-z) * h + z * q
|
59 |
+
|
60 |
+
return h
|
61 |
+
|
62 |
+
class SmallMotionEncoder(nn.Module):
|
63 |
+
def __init__(self, args):
|
64 |
+
super(SmallMotionEncoder, self).__init__()
|
65 |
+
cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
|
66 |
+
self.convc1 = nn.Conv2d(cor_planes, 96, 1, padding=0)
|
67 |
+
self.convf1 = nn.Conv2d(2, 64, 7, padding=3)
|
68 |
+
self.convf2 = nn.Conv2d(64, 32, 3, padding=1)
|
69 |
+
self.conv = nn.Conv2d(128, 80, 3, padding=1)
|
70 |
+
|
71 |
+
def forward(self, flow, corr):
|
72 |
+
cor = F.relu(self.convc1(corr))
|
73 |
+
flo = F.relu(self.convf1(flow))
|
74 |
+
flo = F.relu(self.convf2(flo))
|
75 |
+
cor_flo = torch.cat([cor, flo], dim=1)
|
76 |
+
out = F.relu(self.conv(cor_flo))
|
77 |
+
return torch.cat([out, flow], dim=1)
|
78 |
+
|
79 |
+
class BasicMotionEncoder(nn.Module):
|
80 |
+
def __init__(self, args):
|
81 |
+
super(BasicMotionEncoder, self).__init__()
|
82 |
+
cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
|
83 |
+
self.convc1 = nn.Conv2d(cor_planes, 256, 1, padding=0)
|
84 |
+
self.convc2 = nn.Conv2d(256, 192, 3, padding=1)
|
85 |
+
self.convf1 = nn.Conv2d(2, 128, 7, padding=3)
|
86 |
+
self.convf2 = nn.Conv2d(128, 64, 3, padding=1)
|
87 |
+
self.conv = nn.Conv2d(64+192, 128-2, 3, padding=1)
|
88 |
+
|
89 |
+
def forward(self, flow, corr):
|
90 |
+
cor = F.relu(self.convc1(corr))
|
91 |
+
cor = F.relu(self.convc2(cor))
|
92 |
+
flo = F.relu(self.convf1(flow))
|
93 |
+
flo = F.relu(self.convf2(flo))
|
94 |
+
|
95 |
+
cor_flo = torch.cat([cor, flo], dim=1)
|
96 |
+
out = F.relu(self.conv(cor_flo))
|
97 |
+
return torch.cat([out, flow], dim=1)
|
98 |
+
|
99 |
+
class SmallUpdateBlock(nn.Module):
|
100 |
+
def __init__(self, args, hidden_dim=96):
|
101 |
+
super(SmallUpdateBlock, self).__init__()
|
102 |
+
self.encoder = SmallMotionEncoder(args)
|
103 |
+
self.gru = ConvGRU(hidden_dim=hidden_dim, input_dim=82+64)
|
104 |
+
self.flow_head = FlowHead(hidden_dim, hidden_dim=128)
|
105 |
+
|
106 |
+
def forward(self, net, inp, corr, flow):
|
107 |
+
motion_features = self.encoder(flow, corr)
|
108 |
+
inp = torch.cat([inp, motion_features], dim=1)
|
109 |
+
net = self.gru(net, inp)
|
110 |
+
delta_flow = self.flow_head(net)
|
111 |
+
|
112 |
+
return net, None, delta_flow
|
113 |
+
|
114 |
+
class BasicUpdateBlock(nn.Module):
|
115 |
+
def __init__(self, args, hidden_dim=128, input_dim=128):
|
116 |
+
super(BasicUpdateBlock, self).__init__()
|
117 |
+
self.args = args
|
118 |
+
self.encoder = BasicMotionEncoder(args)
|
119 |
+
self.gru = SepConvGRU(hidden_dim=hidden_dim, input_dim=128+hidden_dim)
|
120 |
+
self.flow_head = FlowHead(hidden_dim, hidden_dim=256)
|
121 |
+
|
122 |
+
self.mask = nn.Sequential(
|
123 |
+
nn.Conv2d(128, 256, 3, padding=1),
|
124 |
+
nn.ReLU(inplace=True),
|
125 |
+
nn.Conv2d(256, 64*9, 1, padding=0))
|
126 |
+
|
127 |
+
def forward(self, net, inp, corr, flow, upsample=True):
|
128 |
+
motion_features = self.encoder(flow, corr)
|
129 |
+
inp = torch.cat([inp, motion_features], dim=1)
|
130 |
+
|
131 |
+
net = self.gru(net, inp)
|
132 |
+
delta_flow = self.flow_head(net)
|
133 |
+
|
134 |
+
# scale mask to balence gradients
|
135 |
+
mask = .25 * self.mask(net)
|
136 |
+
return net, mask, delta_flow
|
137 |
+
|
138 |
+
|
139 |
+
|
SD-CN-Animation/RAFT/utils/__init__.py
ADDED
File without changes
|
SD-CN-Animation/RAFT/utils/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (166 Bytes). View file
|
|
SD-CN-Animation/RAFT/utils/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (3.12 kB). View file
|
|
SD-CN-Animation/RAFT/utils/augmentor.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
import math
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
import cv2
|
7 |
+
cv2.setNumThreads(0)
|
8 |
+
cv2.ocl.setUseOpenCL(False)
|
9 |
+
|
10 |
+
import torch
|
11 |
+
from torchvision.transforms import ColorJitter
|
12 |
+
import torch.nn.functional as F
|
13 |
+
|
14 |
+
|
15 |
+
class FlowAugmentor:
|
16 |
+
def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=True):
|
17 |
+
|
18 |
+
# spatial augmentation params
|
19 |
+
self.crop_size = crop_size
|
20 |
+
self.min_scale = min_scale
|
21 |
+
self.max_scale = max_scale
|
22 |
+
self.spatial_aug_prob = 0.8
|
23 |
+
self.stretch_prob = 0.8
|
24 |
+
self.max_stretch = 0.2
|
25 |
+
|
26 |
+
# flip augmentation params
|
27 |
+
self.do_flip = do_flip
|
28 |
+
self.h_flip_prob = 0.5
|
29 |
+
self.v_flip_prob = 0.1
|
30 |
+
|
31 |
+
# photometric augmentation params
|
32 |
+
self.photo_aug = ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.5/3.14)
|
33 |
+
self.asymmetric_color_aug_prob = 0.2
|
34 |
+
self.eraser_aug_prob = 0.5
|
35 |
+
|
36 |
+
def color_transform(self, img1, img2):
|
37 |
+
""" Photometric augmentation """
|
38 |
+
|
39 |
+
# asymmetric
|
40 |
+
if np.random.rand() < self.asymmetric_color_aug_prob:
|
41 |
+
img1 = np.array(self.photo_aug(Image.fromarray(img1)), dtype=np.uint8)
|
42 |
+
img2 = np.array(self.photo_aug(Image.fromarray(img2)), dtype=np.uint8)
|
43 |
+
|
44 |
+
# symmetric
|
45 |
+
else:
|
46 |
+
image_stack = np.concatenate([img1, img2], axis=0)
|
47 |
+
image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
|
48 |
+
img1, img2 = np.split(image_stack, 2, axis=0)
|
49 |
+
|
50 |
+
return img1, img2
|
51 |
+
|
52 |
+
def eraser_transform(self, img1, img2, bounds=[50, 100]):
|
53 |
+
""" Occlusion augmentation """
|
54 |
+
|
55 |
+
ht, wd = img1.shape[:2]
|
56 |
+
if np.random.rand() < self.eraser_aug_prob:
|
57 |
+
mean_color = np.mean(img2.reshape(-1, 3), axis=0)
|
58 |
+
for _ in range(np.random.randint(1, 3)):
|
59 |
+
x0 = np.random.randint(0, wd)
|
60 |
+
y0 = np.random.randint(0, ht)
|
61 |
+
dx = np.random.randint(bounds[0], bounds[1])
|
62 |
+
dy = np.random.randint(bounds[0], bounds[1])
|
63 |
+
img2[y0:y0+dy, x0:x0+dx, :] = mean_color
|
64 |
+
|
65 |
+
return img1, img2
|
66 |
+
|
67 |
+
def spatial_transform(self, img1, img2, flow):
|
68 |
+
# randomly sample scale
|
69 |
+
ht, wd = img1.shape[:2]
|
70 |
+
min_scale = np.maximum(
|
71 |
+
(self.crop_size[0] + 8) / float(ht),
|
72 |
+
(self.crop_size[1] + 8) / float(wd))
|
73 |
+
|
74 |
+
scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
|
75 |
+
scale_x = scale
|
76 |
+
scale_y = scale
|
77 |
+
if np.random.rand() < self.stretch_prob:
|
78 |
+
scale_x *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
|
79 |
+
scale_y *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
|
80 |
+
|
81 |
+
scale_x = np.clip(scale_x, min_scale, None)
|
82 |
+
scale_y = np.clip(scale_y, min_scale, None)
|
83 |
+
|
84 |
+
if np.random.rand() < self.spatial_aug_prob:
|
85 |
+
# rescale the images
|
86 |
+
img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
|
87 |
+
img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
|
88 |
+
flow = cv2.resize(flow, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
|
89 |
+
flow = flow * [scale_x, scale_y]
|
90 |
+
|
91 |
+
if self.do_flip:
|
92 |
+
if np.random.rand() < self.h_flip_prob: # h-flip
|
93 |
+
img1 = img1[:, ::-1]
|
94 |
+
img2 = img2[:, ::-1]
|
95 |
+
flow = flow[:, ::-1] * [-1.0, 1.0]
|
96 |
+
|
97 |
+
if np.random.rand() < self.v_flip_prob: # v-flip
|
98 |
+
img1 = img1[::-1, :]
|
99 |
+
img2 = img2[::-1, :]
|
100 |
+
flow = flow[::-1, :] * [1.0, -1.0]
|
101 |
+
|
102 |
+
y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0])
|
103 |
+
x0 = np.random.randint(0, img1.shape[1] - self.crop_size[1])
|
104 |
+
|
105 |
+
img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
|
106 |
+
img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
|
107 |
+
flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
|
108 |
+
|
109 |
+
return img1, img2, flow
|
110 |
+
|
111 |
+
def __call__(self, img1, img2, flow):
|
112 |
+
img1, img2 = self.color_transform(img1, img2)
|
113 |
+
img1, img2 = self.eraser_transform(img1, img2)
|
114 |
+
img1, img2, flow = self.spatial_transform(img1, img2, flow)
|
115 |
+
|
116 |
+
img1 = np.ascontiguousarray(img1)
|
117 |
+
img2 = np.ascontiguousarray(img2)
|
118 |
+
flow = np.ascontiguousarray(flow)
|
119 |
+
|
120 |
+
return img1, img2, flow
|
121 |
+
|
122 |
+
class SparseFlowAugmentor:
|
123 |
+
def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=False):
|
124 |
+
# spatial augmentation params
|
125 |
+
self.crop_size = crop_size
|
126 |
+
self.min_scale = min_scale
|
127 |
+
self.max_scale = max_scale
|
128 |
+
self.spatial_aug_prob = 0.8
|
129 |
+
self.stretch_prob = 0.8
|
130 |
+
self.max_stretch = 0.2
|
131 |
+
|
132 |
+
# flip augmentation params
|
133 |
+
self.do_flip = do_flip
|
134 |
+
self.h_flip_prob = 0.5
|
135 |
+
self.v_flip_prob = 0.1
|
136 |
+
|
137 |
+
# photometric augmentation params
|
138 |
+
self.photo_aug = ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3/3.14)
|
139 |
+
self.asymmetric_color_aug_prob = 0.2
|
140 |
+
self.eraser_aug_prob = 0.5
|
141 |
+
|
142 |
+
def color_transform(self, img1, img2):
|
143 |
+
image_stack = np.concatenate([img1, img2], axis=0)
|
144 |
+
image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
|
145 |
+
img1, img2 = np.split(image_stack, 2, axis=0)
|
146 |
+
return img1, img2
|
147 |
+
|
148 |
+
def eraser_transform(self, img1, img2):
|
149 |
+
ht, wd = img1.shape[:2]
|
150 |
+
if np.random.rand() < self.eraser_aug_prob:
|
151 |
+
mean_color = np.mean(img2.reshape(-1, 3), axis=0)
|
152 |
+
for _ in range(np.random.randint(1, 3)):
|
153 |
+
x0 = np.random.randint(0, wd)
|
154 |
+
y0 = np.random.randint(0, ht)
|
155 |
+
dx = np.random.randint(50, 100)
|
156 |
+
dy = np.random.randint(50, 100)
|
157 |
+
img2[y0:y0+dy, x0:x0+dx, :] = mean_color
|
158 |
+
|
159 |
+
return img1, img2
|
160 |
+
|
161 |
+
def resize_sparse_flow_map(self, flow, valid, fx=1.0, fy=1.0):
|
162 |
+
ht, wd = flow.shape[:2]
|
163 |
+
coords = np.meshgrid(np.arange(wd), np.arange(ht))
|
164 |
+
coords = np.stack(coords, axis=-1)
|
165 |
+
|
166 |
+
coords = coords.reshape(-1, 2).astype(np.float32)
|
167 |
+
flow = flow.reshape(-1, 2).astype(np.float32)
|
168 |
+
valid = valid.reshape(-1).astype(np.float32)
|
169 |
+
|
170 |
+
coords0 = coords[valid>=1]
|
171 |
+
flow0 = flow[valid>=1]
|
172 |
+
|
173 |
+
ht1 = int(round(ht * fy))
|
174 |
+
wd1 = int(round(wd * fx))
|
175 |
+
|
176 |
+
coords1 = coords0 * [fx, fy]
|
177 |
+
flow1 = flow0 * [fx, fy]
|
178 |
+
|
179 |
+
xx = np.round(coords1[:,0]).astype(np.int32)
|
180 |
+
yy = np.round(coords1[:,1]).astype(np.int32)
|
181 |
+
|
182 |
+
v = (xx > 0) & (xx < wd1) & (yy > 0) & (yy < ht1)
|
183 |
+
xx = xx[v]
|
184 |
+
yy = yy[v]
|
185 |
+
flow1 = flow1[v]
|
186 |
+
|
187 |
+
flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32)
|
188 |
+
valid_img = np.zeros([ht1, wd1], dtype=np.int32)
|
189 |
+
|
190 |
+
flow_img[yy, xx] = flow1
|
191 |
+
valid_img[yy, xx] = 1
|
192 |
+
|
193 |
+
return flow_img, valid_img
|
194 |
+
|
195 |
+
def spatial_transform(self, img1, img2, flow, valid):
|
196 |
+
# randomly sample scale
|
197 |
+
|
198 |
+
ht, wd = img1.shape[:2]
|
199 |
+
min_scale = np.maximum(
|
200 |
+
(self.crop_size[0] + 1) / float(ht),
|
201 |
+
(self.crop_size[1] + 1) / float(wd))
|
202 |
+
|
203 |
+
scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
|
204 |
+
scale_x = np.clip(scale, min_scale, None)
|
205 |
+
scale_y = np.clip(scale, min_scale, None)
|
206 |
+
|
207 |
+
if np.random.rand() < self.spatial_aug_prob:
|
208 |
+
# rescale the images
|
209 |
+
img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
|
210 |
+
img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
|
211 |
+
flow, valid = self.resize_sparse_flow_map(flow, valid, fx=scale_x, fy=scale_y)
|
212 |
+
|
213 |
+
if self.do_flip:
|
214 |
+
if np.random.rand() < 0.5: # h-flip
|
215 |
+
img1 = img1[:, ::-1]
|
216 |
+
img2 = img2[:, ::-1]
|
217 |
+
flow = flow[:, ::-1] * [-1.0, 1.0]
|
218 |
+
valid = valid[:, ::-1]
|
219 |
+
|
220 |
+
margin_y = 20
|
221 |
+
margin_x = 50
|
222 |
+
|
223 |
+
y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0] + margin_y)
|
224 |
+
x0 = np.random.randint(-margin_x, img1.shape[1] - self.crop_size[1] + margin_x)
|
225 |
+
|
226 |
+
y0 = np.clip(y0, 0, img1.shape[0] - self.crop_size[0])
|
227 |
+
x0 = np.clip(x0, 0, img1.shape[1] - self.crop_size[1])
|
228 |
+
|
229 |
+
img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
|
230 |
+
img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
|
231 |
+
flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
|
232 |
+
valid = valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
|
233 |
+
return img1, img2, flow, valid
|
234 |
+
|
235 |
+
|
236 |
+
def __call__(self, img1, img2, flow, valid):
|
237 |
+
img1, img2 = self.color_transform(img1, img2)
|
238 |
+
img1, img2 = self.eraser_transform(img1, img2)
|
239 |
+
img1, img2, flow, valid = self.spatial_transform(img1, img2, flow, valid)
|
240 |
+
|
241 |
+
img1 = np.ascontiguousarray(img1)
|
242 |
+
img2 = np.ascontiguousarray(img2)
|
243 |
+
flow = np.ascontiguousarray(flow)
|
244 |
+
valid = np.ascontiguousarray(valid)
|
245 |
+
|
246 |
+
return img1, img2, flow, valid
|
SD-CN-Animation/RAFT/utils/flow_viz.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Flow visualization code used from https://github.com/tomrunia/OpticalFlow_Visualization
|
2 |
+
|
3 |
+
|
4 |
+
# MIT License
|
5 |
+
#
|
6 |
+
# Copyright (c) 2018 Tom Runia
|
7 |
+
#
|
8 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
9 |
+
# of this software and associated documentation files (the "Software"), to deal
|
10 |
+
# in the Software without restriction, including without limitation the rights
|
11 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12 |
+
# copies of the Software, and to permit persons to whom the Software is
|
13 |
+
# furnished to do so, subject to conditions.
|
14 |
+
#
|
15 |
+
# Author: Tom Runia
|
16 |
+
# Date Created: 2018-08-03
|
17 |
+
|
18 |
+
import numpy as np
|
19 |
+
|
20 |
+
def make_colorwheel():
|
21 |
+
"""
|
22 |
+
Generates a color wheel for optical flow visualization as presented in:
|
23 |
+
Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007)
|
24 |
+
URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf
|
25 |
+
|
26 |
+
Code follows the original C++ source code of Daniel Scharstein.
|
27 |
+
Code follows the the Matlab source code of Deqing Sun.
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
np.ndarray: Color wheel
|
31 |
+
"""
|
32 |
+
|
33 |
+
RY = 15
|
34 |
+
YG = 6
|
35 |
+
GC = 4
|
36 |
+
CB = 11
|
37 |
+
BM = 13
|
38 |
+
MR = 6
|
39 |
+
|
40 |
+
ncols = RY + YG + GC + CB + BM + MR
|
41 |
+
colorwheel = np.zeros((ncols, 3))
|
42 |
+
col = 0
|
43 |
+
|
44 |
+
# RY
|
45 |
+
colorwheel[0:RY, 0] = 255
|
46 |
+
colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY)
|
47 |
+
col = col+RY
|
48 |
+
# YG
|
49 |
+
colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG)
|
50 |
+
colorwheel[col:col+YG, 1] = 255
|
51 |
+
col = col+YG
|
52 |
+
# GC
|
53 |
+
colorwheel[col:col+GC, 1] = 255
|
54 |
+
colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC)
|
55 |
+
col = col+GC
|
56 |
+
# CB
|
57 |
+
colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB)
|
58 |
+
colorwheel[col:col+CB, 2] = 255
|
59 |
+
col = col+CB
|
60 |
+
# BM
|
61 |
+
colorwheel[col:col+BM, 2] = 255
|
62 |
+
colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM)
|
63 |
+
col = col+BM
|
64 |
+
# MR
|
65 |
+
colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR)
|
66 |
+
colorwheel[col:col+MR, 0] = 255
|
67 |
+
return colorwheel
|
68 |
+
|
69 |
+
|
70 |
+
def flow_uv_to_colors(u, v, convert_to_bgr=False):
|
71 |
+
"""
|
72 |
+
Applies the flow color wheel to (possibly clipped) flow components u and v.
|
73 |
+
|
74 |
+
According to the C++ source code of Daniel Scharstein
|
75 |
+
According to the Matlab source code of Deqing Sun
|
76 |
+
|
77 |
+
Args:
|
78 |
+
u (np.ndarray): Input horizontal flow of shape [H,W]
|
79 |
+
v (np.ndarray): Input vertical flow of shape [H,W]
|
80 |
+
convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
|
81 |
+
|
82 |
+
Returns:
|
83 |
+
np.ndarray: Flow visualization image of shape [H,W,3]
|
84 |
+
"""
|
85 |
+
flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8)
|
86 |
+
colorwheel = make_colorwheel() # shape [55x3]
|
87 |
+
ncols = colorwheel.shape[0]
|
88 |
+
rad = np.sqrt(np.square(u) + np.square(v))
|
89 |
+
a = np.arctan2(-v, -u)/np.pi
|
90 |
+
fk = (a+1) / 2*(ncols-1)
|
91 |
+
k0 = np.floor(fk).astype(np.int32)
|
92 |
+
k1 = k0 + 1
|
93 |
+
k1[k1 == ncols] = 0
|
94 |
+
f = fk - k0
|
95 |
+
for i in range(colorwheel.shape[1]):
|
96 |
+
tmp = colorwheel[:,i]
|
97 |
+
col0 = tmp[k0] / 255.0
|
98 |
+
col1 = tmp[k1] / 255.0
|
99 |
+
col = (1-f)*col0 + f*col1
|
100 |
+
idx = (rad <= 1)
|
101 |
+
col[idx] = 1 - rad[idx] * (1-col[idx])
|
102 |
+
col[~idx] = col[~idx] * 0.75 # out of range
|
103 |
+
# Note the 2-i => BGR instead of RGB
|
104 |
+
ch_idx = 2-i if convert_to_bgr else i
|
105 |
+
flow_image[:,:,ch_idx] = np.floor(255 * col)
|
106 |
+
return flow_image
|
107 |
+
|
108 |
+
|
109 |
+
def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False):
|
110 |
+
"""
|
111 |
+
Expects a two dimensional flow image of shape.
|
112 |
+
|
113 |
+
Args:
|
114 |
+
flow_uv (np.ndarray): Flow UV image of shape [H,W,2]
|
115 |
+
clip_flow (float, optional): Clip maximum of flow values. Defaults to None.
|
116 |
+
convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
|
117 |
+
|
118 |
+
Returns:
|
119 |
+
np.ndarray: Flow visualization image of shape [H,W,3]
|
120 |
+
"""
|
121 |
+
assert flow_uv.ndim == 3, 'input flow must have three dimensions'
|
122 |
+
assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]'
|
123 |
+
if clip_flow is not None:
|
124 |
+
flow_uv = np.clip(flow_uv, 0, clip_flow)
|
125 |
+
u = flow_uv[:,:,0]
|
126 |
+
v = flow_uv[:,:,1]
|
127 |
+
rad = np.sqrt(np.square(u) + np.square(v))
|
128 |
+
rad_max = np.max(rad)
|
129 |
+
epsilon = 1e-5
|
130 |
+
u = u / (rad_max + epsilon)
|
131 |
+
v = v / (rad_max + epsilon)
|
132 |
+
return flow_uv_to_colors(u, v, convert_to_bgr)
|
SD-CN-Animation/RAFT/utils/frame_utils.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from PIL import Image
|
3 |
+
from os.path import *
|
4 |
+
import re
|
5 |
+
|
6 |
+
import cv2
|
7 |
+
cv2.setNumThreads(0)
|
8 |
+
cv2.ocl.setUseOpenCL(False)
|
9 |
+
|
10 |
+
TAG_CHAR = np.array([202021.25], np.float32)
|
11 |
+
|
12 |
+
def readFlow(fn):
|
13 |
+
""" Read .flo file in Middlebury format"""
|
14 |
+
# Code adapted from:
|
15 |
+
# http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
|
16 |
+
|
17 |
+
# WARNING: this will work on little-endian architectures (eg Intel x86) only!
|
18 |
+
# print 'fn = %s'%(fn)
|
19 |
+
with open(fn, 'rb') as f:
|
20 |
+
magic = np.fromfile(f, np.float32, count=1)
|
21 |
+
if 202021.25 != magic:
|
22 |
+
print('Magic number incorrect. Invalid .flo file')
|
23 |
+
return None
|
24 |
+
else:
|
25 |
+
w = np.fromfile(f, np.int32, count=1)
|
26 |
+
h = np.fromfile(f, np.int32, count=1)
|
27 |
+
# print 'Reading %d x %d flo file\n' % (w, h)
|
28 |
+
data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
|
29 |
+
# Reshape data into 3D array (columns, rows, bands)
|
30 |
+
# The reshape here is for visualization, the original code is (w,h,2)
|
31 |
+
return np.resize(data, (int(h), int(w), 2))
|
32 |
+
|
33 |
+
def readPFM(file):
|
34 |
+
file = open(file, 'rb')
|
35 |
+
|
36 |
+
color = None
|
37 |
+
width = None
|
38 |
+
height = None
|
39 |
+
scale = None
|
40 |
+
endian = None
|
41 |
+
|
42 |
+
header = file.readline().rstrip()
|
43 |
+
if header == b'PF':
|
44 |
+
color = True
|
45 |
+
elif header == b'Pf':
|
46 |
+
color = False
|
47 |
+
else:
|
48 |
+
raise Exception('Not a PFM file.')
|
49 |
+
|
50 |
+
dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline())
|
51 |
+
if dim_match:
|
52 |
+
width, height = map(int, dim_match.groups())
|
53 |
+
else:
|
54 |
+
raise Exception('Malformed PFM header.')
|
55 |
+
|
56 |
+
scale = float(file.readline().rstrip())
|
57 |
+
if scale < 0: # little-endian
|
58 |
+
endian = '<'
|
59 |
+
scale = -scale
|
60 |
+
else:
|
61 |
+
endian = '>' # big-endian
|
62 |
+
|
63 |
+
data = np.fromfile(file, endian + 'f')
|
64 |
+
shape = (height, width, 3) if color else (height, width)
|
65 |
+
|
66 |
+
data = np.reshape(data, shape)
|
67 |
+
data = np.flipud(data)
|
68 |
+
return data
|
69 |
+
|
70 |
+
def writeFlow(filename,uv,v=None):
|
71 |
+
""" Write optical flow to file.
|
72 |
+
|
73 |
+
If v is None, uv is assumed to contain both u and v channels,
|
74 |
+
stacked in depth.
|
75 |
+
Original code by Deqing Sun, adapted from Daniel Scharstein.
|
76 |
+
"""
|
77 |
+
nBands = 2
|
78 |
+
|
79 |
+
if v is None:
|
80 |
+
assert(uv.ndim == 3)
|
81 |
+
assert(uv.shape[2] == 2)
|
82 |
+
u = uv[:,:,0]
|
83 |
+
v = uv[:,:,1]
|
84 |
+
else:
|
85 |
+
u = uv
|
86 |
+
|
87 |
+
assert(u.shape == v.shape)
|
88 |
+
height,width = u.shape
|
89 |
+
f = open(filename,'wb')
|
90 |
+
# write the header
|
91 |
+
f.write(TAG_CHAR)
|
92 |
+
np.array(width).astype(np.int32).tofile(f)
|
93 |
+
np.array(height).astype(np.int32).tofile(f)
|
94 |
+
# arrange into matrix form
|
95 |
+
tmp = np.zeros((height, width*nBands))
|
96 |
+
tmp[:,np.arange(width)*2] = u
|
97 |
+
tmp[:,np.arange(width)*2 + 1] = v
|
98 |
+
tmp.astype(np.float32).tofile(f)
|
99 |
+
f.close()
|
100 |
+
|
101 |
+
|
102 |
+
def readFlowKITTI(filename):
|
103 |
+
flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR)
|
104 |
+
flow = flow[:,:,::-1].astype(np.float32)
|
105 |
+
flow, valid = flow[:, :, :2], flow[:, :, 2]
|
106 |
+
flow = (flow - 2**15) / 64.0
|
107 |
+
return flow, valid
|
108 |
+
|
109 |
+
def readDispKITTI(filename):
|
110 |
+
disp = cv2.imread(filename, cv2.IMREAD_ANYDEPTH) / 256.0
|
111 |
+
valid = disp > 0.0
|
112 |
+
flow = np.stack([-disp, np.zeros_like(disp)], -1)
|
113 |
+
return flow, valid
|
114 |
+
|
115 |
+
|
116 |
+
def writeFlowKITTI(filename, uv):
|
117 |
+
uv = 64.0 * uv + 2**15
|
118 |
+
valid = np.ones([uv.shape[0], uv.shape[1], 1])
|
119 |
+
uv = np.concatenate([uv, valid], axis=-1).astype(np.uint16)
|
120 |
+
cv2.imwrite(filename, uv[..., ::-1])
|
121 |
+
|
122 |
+
|
123 |
+
def read_gen(file_name, pil=False):
|
124 |
+
ext = splitext(file_name)[-1]
|
125 |
+
if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
|
126 |
+
return Image.open(file_name)
|
127 |
+
elif ext == '.bin' or ext == '.raw':
|
128 |
+
return np.load(file_name)
|
129 |
+
elif ext == '.flo':
|
130 |
+
return readFlow(file_name).astype(np.float32)
|
131 |
+
elif ext == '.pfm':
|
132 |
+
flow = readPFM(file_name).astype(np.float32)
|
133 |
+
if len(flow.shape) == 2:
|
134 |
+
return flow
|
135 |
+
else:
|
136 |
+
return flow[:, :, :-1]
|
137 |
+
return []
|
SD-CN-Animation/RAFT/utils/utils.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
import numpy as np
|
4 |
+
from scipy import interpolate
|
5 |
+
|
6 |
+
|
7 |
+
class InputPadder:
|
8 |
+
""" Pads images such that dimensions are divisible by 8 """
|
9 |
+
def __init__(self, dims, mode='sintel'):
|
10 |
+
self.ht, self.wd = dims[-2:]
|
11 |
+
pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8
|
12 |
+
pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8
|
13 |
+
if mode == 'sintel':
|
14 |
+
self._pad = [pad_wd//2, pad_wd - pad_wd//2, pad_ht//2, pad_ht - pad_ht//2]
|
15 |
+
else:
|
16 |
+
self._pad = [pad_wd//2, pad_wd - pad_wd//2, 0, pad_ht]
|
17 |
+
|
18 |
+
def pad(self, *inputs):
|
19 |
+
return [F.pad(x, self._pad, mode='replicate') for x in inputs]
|
20 |
+
|
21 |
+
def unpad(self,x):
|
22 |
+
ht, wd = x.shape[-2:]
|
23 |
+
c = [self._pad[2], ht-self._pad[3], self._pad[0], wd-self._pad[1]]
|
24 |
+
return x[..., c[0]:c[1], c[2]:c[3]]
|
25 |
+
|
26 |
+
def forward_interpolate(flow):
|
27 |
+
flow = flow.detach().cpu().numpy()
|
28 |
+
dx, dy = flow[0], flow[1]
|
29 |
+
|
30 |
+
ht, wd = dx.shape
|
31 |
+
x0, y0 = np.meshgrid(np.arange(wd), np.arange(ht))
|
32 |
+
|
33 |
+
x1 = x0 + dx
|
34 |
+
y1 = y0 + dy
|
35 |
+
|
36 |
+
x1 = x1.reshape(-1)
|
37 |
+
y1 = y1.reshape(-1)
|
38 |
+
dx = dx.reshape(-1)
|
39 |
+
dy = dy.reshape(-1)
|
40 |
+
|
41 |
+
valid = (x1 > 0) & (x1 < wd) & (y1 > 0) & (y1 < ht)
|
42 |
+
x1 = x1[valid]
|
43 |
+
y1 = y1[valid]
|
44 |
+
dx = dx[valid]
|
45 |
+
dy = dy[valid]
|
46 |
+
|
47 |
+
flow_x = interpolate.griddata(
|
48 |
+
(x1, y1), dx, (x0, y0), method='nearest', fill_value=0)
|
49 |
+
|
50 |
+
flow_y = interpolate.griddata(
|
51 |
+
(x1, y1), dy, (x0, y0), method='nearest', fill_value=0)
|
52 |
+
|
53 |
+
flow = np.stack([flow_x, flow_y], axis=0)
|
54 |
+
return torch.from_numpy(flow).float()
|
55 |
+
|
56 |
+
|
57 |
+
def bilinear_sampler(img, coords, mode='bilinear', mask=False):
|
58 |
+
""" Wrapper for grid_sample, uses pixel coordinates """
|
59 |
+
H, W = img.shape[-2:]
|
60 |
+
xgrid, ygrid = coords.split([1,1], dim=-1)
|
61 |
+
xgrid = 2*xgrid/(W-1) - 1
|
62 |
+
ygrid = 2*ygrid/(H-1) - 1
|
63 |
+
|
64 |
+
grid = torch.cat([xgrid, ygrid], dim=-1)
|
65 |
+
img = F.grid_sample(img, grid, align_corners=True)
|
66 |
+
|
67 |
+
if mask:
|
68 |
+
mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1)
|
69 |
+
return img, mask.float()
|
70 |
+
|
71 |
+
return img
|
72 |
+
|
73 |
+
|
74 |
+
def coords_grid(batch, ht, wd, device):
|
75 |
+
coords = torch.meshgrid(torch.arange(ht, device=device), torch.arange(wd, device=device))
|
76 |
+
coords = torch.stack(coords[::-1], dim=0).float()
|
77 |
+
return coords[None].repeat(batch, 1, 1, 1)
|
78 |
+
|
79 |
+
|
80 |
+
def upflow8(flow, mode='bilinear'):
|
81 |
+
new_size = (8 * flow.shape[2], 8 * flow.shape[3])
|
82 |
+
return 8 * F.interpolate(flow, size=new_size, mode=mode, align_corners=True)
|
SD-CN-Animation/examples/bonefire_1.mp4
ADDED
Binary file (840 kB). View file
|
|
SD-CN-Animation/examples/bonfire_1.gif
ADDED
SD-CN-Animation/examples/cn_settings.png
ADDED
SD-CN-Animation/examples/diamond_4.gif
ADDED
SD-CN-Animation/examples/diamond_4.mp4
ADDED
Binary file (353 kB). View file
|
|
SD-CN-Animation/examples/flower_1.gif
ADDED
Git LFS Details
|
SD-CN-Animation/examples/flower_1.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8db0719d9f215b775ae1b5dae912a425bc010f0586b41894c14bb8ad042711e
|
3 |
+
size 1259280
|
SD-CN-Animation/examples/flower_11.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7499401998e41c65471963d6cbd70568908dd83a8c957a43940df99be7c52026
|
3 |
+
size 1328049
|
SD-CN-Animation/examples/girl_org.gif
ADDED
Git LFS Details
|
SD-CN-Animation/examples/girl_to_jc.gif
ADDED
Git LFS Details
|
SD-CN-Animation/examples/girl_to_jc.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d09ded8b44f7e30d55d5d6245d9ec7fa3b95e970a8c29d2c544b6c288341e39
|
3 |
+
size 5274033
|
SD-CN-Animation/examples/girl_to_wc.gif
ADDED
Git LFS Details
|
SD-CN-Animation/examples/girl_to_wc.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd730de667b8e7ea5af2dddcf129095694349f126e06291c9b1c2bb7d49843a8
|
3 |
+
size 5630710
|
SD-CN-Animation/examples/gold_1.gif
ADDED
Git LFS Details
|
SD-CN-Animation/examples/gold_1.mp4
ADDED
Binary file (636 kB). View file
|
|
SD-CN-Animation/examples/macaroni_1.gif
ADDED
Git LFS Details
|
SD-CN-Animation/examples/macaroni_1.mp4
ADDED
Binary file (731 kB). View file
|
|
SD-CN-Animation/examples/tree_2.gif
ADDED
Git LFS Details
|
SD-CN-Animation/examples/tree_2.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2bdab6694727e6dab21e49311efbc21296d445bf11473de6867848b305d3775
|
3 |
+
size 1333426
|
SD-CN-Animation/examples/ui_preview.png
ADDED
SD-CN-Animation/install.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import launch
|
2 |
+
import os
|
3 |
+
import pkg_resources
|
4 |
+
|
5 |
+
req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt")
|
6 |
+
|
7 |
+
with open(req_file) as file:
|
8 |
+
for package in file:
|
9 |
+
try:
|
10 |
+
package = package.strip()
|
11 |
+
if '==' in package:
|
12 |
+
package_name, package_version = package.split('==')
|
13 |
+
installed_version = pkg_resources.get_distribution(package_name).version
|
14 |
+
if installed_version != package_version:
|
15 |
+
launch.run_pip(f"install {package}", f"SD-CN-Animation requirement: changing {package_name} version from {installed_version} to {package_version}")
|
16 |
+
elif not launch.is_installed(package):
|
17 |
+
launch.run_pip(f"install {package}", f"SD-CN-Animation requirement: {package}")
|
18 |
+
except Exception as e:
|
19 |
+
print(e)
|
20 |
+
print(f'Warning: Failed to install {package}.')
|
SD-CN-Animation/old_scripts/compute_flow.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import base64
|
3 |
+
import numpy as np
|
4 |
+
from tqdm import tqdm
|
5 |
+
import os
|
6 |
+
|
7 |
+
from flow_utils import RAFT_estimate_flow
|
8 |
+
import h5py
|
9 |
+
|
10 |
+
import argparse
|
11 |
+
|
12 |
+
def main(args):
|
13 |
+
W, H = args.width, args.height
|
14 |
+
# Open the input video file
|
15 |
+
input_video = cv2.VideoCapture(args.input_video)
|
16 |
+
|
17 |
+
# Get useful info from the source video
|
18 |
+
fps = int(input_video.get(cv2.CAP_PROP_FPS))
|
19 |
+
total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
|
20 |
+
|
21 |
+
prev_frame = None
|
22 |
+
|
23 |
+
# create an empty HDF5 file
|
24 |
+
with h5py.File(args.output_file, 'w') as f: pass
|
25 |
+
|
26 |
+
# open the file for writing a flow maps into it
|
27 |
+
with h5py.File(args.output_file, 'a') as f:
|
28 |
+
flow_maps = f.create_dataset('flow_maps', shape=(0, 2, H, W, 2), maxshape=(None, 2, H, W, 2), dtype=np.float16)
|
29 |
+
|
30 |
+
for ind in tqdm(range(total_frames)):
|
31 |
+
# Read the next frame from the input video
|
32 |
+
if not input_video.isOpened(): break
|
33 |
+
ret, cur_frame = input_video.read()
|
34 |
+
if not ret: break
|
35 |
+
|
36 |
+
cur_frame = cv2.resize(cur_frame, (W, H))
|
37 |
+
|
38 |
+
if prev_frame is not None:
|
39 |
+
next_flow, prev_flow, occlusion_mask, frame1_bg_removed, frame2_bg_removed = RAFT_estimate_flow(prev_frame, cur_frame, subtract_background=args.remove_background)
|
40 |
+
|
41 |
+
# write data into a file
|
42 |
+
flow_maps.resize(ind, axis=0)
|
43 |
+
flow_maps[ind-1, 0] = next_flow
|
44 |
+
flow_maps[ind-1, 1] = prev_flow
|
45 |
+
|
46 |
+
occlusion_mask = np.clip(occlusion_mask * 0.2 * 255, 0, 255).astype(np.uint8)
|
47 |
+
|
48 |
+
if args.visualize:
|
49 |
+
# show the last written frame - useful to catch any issue with the process
|
50 |
+
if args.remove_background:
|
51 |
+
img_show = cv2.hconcat([cur_frame, frame2_bg_removed, occlusion_mask])
|
52 |
+
else:
|
53 |
+
img_show = cv2.hconcat([cur_frame, occlusion_mask])
|
54 |
+
cv2.imshow('Out img', img_show)
|
55 |
+
if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
|
56 |
+
|
57 |
+
prev_frame = cur_frame.copy()
|
58 |
+
|
59 |
+
# Release the input and output video files
|
60 |
+
input_video.release()
|
61 |
+
|
62 |
+
# Close all windows
|
63 |
+
if args.visualize: cv2.destroyAllWindows()
|
64 |
+
|
65 |
+
if __name__ == '__main__':
|
66 |
+
parser = argparse.ArgumentParser()
|
67 |
+
parser.add_argument('-i', '--input_video', help="Path to input video file", required=True)
|
68 |
+
parser.add_argument('-o', '--output_file', help="Path to output flow file. Stored in *.h5 format", required=True)
|
69 |
+
parser.add_argument('-W', '--width', help='Width of the generated flow maps', default=1024, type=int)
|
70 |
+
parser.add_argument('-H', '--height', help='Height of the generated flow maps', default=576, type=int)
|
71 |
+
parser.add_argument('-v', '--visualize', action='store_true', help='Show proceed images and occlusion maps')
|
72 |
+
parser.add_argument('-rb', '--remove_background', action='store_true', help='Remove background of the image')
|
73 |
+
args = parser.parse_args()
|
74 |
+
|
75 |
+
main(args)
|
SD-CN-Animation/old_scripts/flow_utils.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2
|
3 |
+
|
4 |
+
# RAFT dependencies
|
5 |
+
import sys
|
6 |
+
sys.path.append('RAFT/core')
|
7 |
+
|
8 |
+
from collections import namedtuple
|
9 |
+
import torch
|
10 |
+
import argparse
|
11 |
+
from raft import RAFT
|
12 |
+
from utils.utils import InputPadder
|
13 |
+
|
14 |
+
RAFT_model = None
|
15 |
+
fgbg = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=16, detectShadows=True)
|
16 |
+
|
17 |
+
def background_subtractor(frame, fgbg):
|
18 |
+
fgmask = fgbg.apply(frame)
|
19 |
+
return cv2.bitwise_and(frame, frame, mask=fgmask)
|
20 |
+
|
21 |
+
def RAFT_estimate_flow(frame1, frame2, device='cuda', subtract_background=True):
|
22 |
+
global RAFT_model
|
23 |
+
if RAFT_model is None:
|
24 |
+
args = argparse.Namespace(**{
|
25 |
+
'model': 'RAFT/models/raft-things.pth',
|
26 |
+
'mixed_precision': True,
|
27 |
+
'small': False,
|
28 |
+
'alternate_corr': False,
|
29 |
+
'path': ""
|
30 |
+
})
|
31 |
+
|
32 |
+
RAFT_model = torch.nn.DataParallel(RAFT(args))
|
33 |
+
RAFT_model.load_state_dict(torch.load(args.model))
|
34 |
+
|
35 |
+
RAFT_model = RAFT_model.module
|
36 |
+
RAFT_model.to(device)
|
37 |
+
RAFT_model.eval()
|
38 |
+
|
39 |
+
if subtract_background:
|
40 |
+
frame1 = background_subtractor(frame1, fgbg)
|
41 |
+
frame2 = background_subtractor(frame2, fgbg)
|
42 |
+
|
43 |
+
with torch.no_grad():
|
44 |
+
frame1_torch = torch.from_numpy(frame1).permute(2, 0, 1).float()[None].to(device)
|
45 |
+
frame2_torch = torch.from_numpy(frame2).permute(2, 0, 1).float()[None].to(device)
|
46 |
+
|
47 |
+
padder = InputPadder(frame1_torch.shape)
|
48 |
+
image1, image2 = padder.pad(frame1_torch, frame2_torch)
|
49 |
+
|
50 |
+
# estimate optical flow
|
51 |
+
_, next_flow = RAFT_model(image1, image2, iters=20, test_mode=True)
|
52 |
+
_, prev_flow = RAFT_model(image2, image1, iters=20, test_mode=True)
|
53 |
+
|
54 |
+
next_flow = next_flow[0].permute(1, 2, 0).cpu().numpy()
|
55 |
+
prev_flow = prev_flow[0].permute(1, 2, 0).cpu().numpy()
|
56 |
+
|
57 |
+
fb_flow = next_flow + prev_flow
|
58 |
+
fb_norm = np.linalg.norm(fb_flow, axis=2)
|
59 |
+
|
60 |
+
occlusion_mask = fb_norm[..., None].repeat(3, axis=-1)
|
61 |
+
|
62 |
+
return next_flow, prev_flow, occlusion_mask, frame1, frame2
|
63 |
+
|
64 |
+
# ... rest of the file ...
|
65 |
+
|
66 |
+
|
67 |
+
def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_styled):
|
68 |
+
h, w = cur_frame.shape[:2]
|
69 |
+
|
70 |
+
#print(np.amin(next_flow), np.amax(next_flow))
|
71 |
+
#exit()
|
72 |
+
|
73 |
+
|
74 |
+
fl_w, fl_h = next_flow.shape[:2]
|
75 |
+
|
76 |
+
# normalize flow
|
77 |
+
next_flow = next_flow / np.array([fl_h,fl_w])
|
78 |
+
prev_flow = prev_flow / np.array([fl_h,fl_w])
|
79 |
+
|
80 |
+
# remove low value noise (@alexfredo suggestion)
|
81 |
+
next_flow[np.abs(next_flow) < 0.05] = 0
|
82 |
+
prev_flow[np.abs(prev_flow) < 0.05] = 0
|
83 |
+
|
84 |
+
# resize flow
|
85 |
+
next_flow = cv2.resize(next_flow, (w, h))
|
86 |
+
next_flow = (next_flow * np.array([h,w])).astype(np.float32)
|
87 |
+
prev_flow = cv2.resize(prev_flow, (w, h))
|
88 |
+
prev_flow = (prev_flow * np.array([h,w])).astype(np.float32)
|
89 |
+
|
90 |
+
# Generate sampling grids
|
91 |
+
grid_y, grid_x = torch.meshgrid(torch.arange(0, h), torch.arange(0, w))
|
92 |
+
flow_grid = torch.stack((grid_x, grid_y), dim=0).float()
|
93 |
+
flow_grid += torch.from_numpy(prev_flow).permute(2, 0, 1)
|
94 |
+
flow_grid = flow_grid.unsqueeze(0)
|
95 |
+
flow_grid[:, 0, :, :] = 2 * flow_grid[:, 0, :, :] / (w - 1) - 1
|
96 |
+
flow_grid[:, 1, :, :] = 2 * flow_grid[:, 1, :, :] / (h - 1) - 1
|
97 |
+
flow_grid = flow_grid.permute(0, 2, 3, 1)
|
98 |
+
|
99 |
+
|
100 |
+
prev_frame_torch = torch.from_numpy(prev_frame).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
|
101 |
+
prev_frame_styled_torch = torch.from_numpy(prev_frame_styled).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
|
102 |
+
|
103 |
+
warped_frame = torch.nn.functional.grid_sample(prev_frame_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
|
104 |
+
warped_frame_styled = torch.nn.functional.grid_sample(prev_frame_styled_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
|
105 |
+
|
106 |
+
#warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
|
107 |
+
#warped_frame_styled = cv2.remap(prev_frame_styled, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
|
108 |
+
|
109 |
+
# compute occlusion mask
|
110 |
+
fb_flow = next_flow + prev_flow
|
111 |
+
fb_norm = np.linalg.norm(fb_flow, axis=2)
|
112 |
+
|
113 |
+
occlusion_mask = fb_norm[..., None]
|
114 |
+
|
115 |
+
diff_mask_org = np.abs(warped_frame.astype(np.float32) - cur_frame.astype(np.float32)) / 255
|
116 |
+
diff_mask_org = diff_mask_org.max(axis = -1, keepdims=True)
|
117 |
+
|
118 |
+
diff_mask_stl = np.abs(warped_frame_styled.astype(np.float32) - cur_frame.astype(np.float32)) / 255
|
119 |
+
diff_mask_stl = diff_mask_stl.max(axis = -1, keepdims=True)
|
120 |
+
|
121 |
+
alpha_mask = np.maximum(occlusion_mask * 0.3, diff_mask_org * 4, diff_mask_stl * 2)
|
122 |
+
alpha_mask = alpha_mask.repeat(3, axis = -1)
|
123 |
+
|
124 |
+
#alpha_mask_blured = cv2.dilate(alpha_mask, np.ones((5, 5), np.float32))
|
125 |
+
alpha_mask = cv2.GaussianBlur(alpha_mask, (51,51), 5, cv2.BORDER_REFLECT)
|
126 |
+
|
127 |
+
alpha_mask = np.clip(alpha_mask, 0, 1)
|
128 |
+
|
129 |
+
return alpha_mask, warped_frame_styled
|
130 |
+
|
131 |
+
def frames_norm(occl): return occl / 127.5 - 1
|
132 |
+
|
133 |
+
def flow_norm(flow): return flow / 255
|
134 |
+
|
135 |
+
def occl_norm(occl): return occl / 127.5 - 1
|
136 |
+
|
137 |
+
def flow_renorm(flow): return flow * 255
|
138 |
+
|
139 |
+
def occl_renorm(occl): return (occl + 1) * 127.5
|
SD-CN-Animation/old_scripts/readme.md
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SD-CN-Animation
|
2 |
+
This project allows you to automate video stylization task using StableDiffusion and ControlNet. It also allows you to generate completely new videos from text at any resolution and length in contrast to other current text2video methods using any Stable Diffusion model as a backbone, including custom ones. It uses '[RAFT](https://github.com/princeton-vl/RAFT)' optical flow estimation algorithm to keep the animation stable and create an inpainting mask that is used to generate the next frame. In text to video mode it relies on 'FloweR' method (work in progress) that predicts optical flow from the previous frames.
|
3 |
+
|
4 |
+
|
5 |
+
### Video to Video Examples:
|
6 |
+
<!--
|
7 |
+
[![IMAGE_ALT](https://img.youtube.com/vi/j-0niEMm6DU/0.jpg)](https://youtu.be/j-0niEMm6DU)
|
8 |
+
This script can also be using to swap the person in the video like in this example: https://youtube.com/shorts/be93_dIeZWU
|
9 |
+
-->
|
10 |
+
</table>
|
11 |
+
<table class="center">
|
12 |
+
<tr>
|
13 |
+
<td><img src="examples/girl_org.gif" raw=true></td>
|
14 |
+
<td><img src="examples/girl_to_jc.gif" raw=true></td>
|
15 |
+
<td><img src="examples/girl_to_wc.gif" raw=true></td>
|
16 |
+
</tr>
|
17 |
+
<tr>
|
18 |
+
<td width=33% align="center">Original video</td>
|
19 |
+
<td width=33% align="center">"Jessica Chastain"</td>
|
20 |
+
<td width=33% align="center">"Watercolor painting"</td>
|
21 |
+
</tr>
|
22 |
+
</table>
|
23 |
+
|
24 |
+
Examples presented are generated at 1024x576 resolution using the 'realisticVisionV13_v13' model as a base. They were cropt, downsized and compressed for better loading speed. You can see them in their original quality in the 'examples' folder.
|
25 |
+
|
26 |
+
### Text to Video Examples:
|
27 |
+
</table>
|
28 |
+
<table class="center">
|
29 |
+
<tr>
|
30 |
+
<td><img src="examples/flower_1.gif" raw=true></td>
|
31 |
+
<td><img src="examples/bonfire_1.gif" raw=true></td>
|
32 |
+
<td><img src="examples/diamond_4.gif" raw=true></td>
|
33 |
+
</tr>
|
34 |
+
<tr>
|
35 |
+
<td width=33% align="center">"close up of a flower"</td>
|
36 |
+
<td width=33% align="center">"bonfire near the camp in the mountains at night"</td>
|
37 |
+
<td width=33% align="center">"close up of a diamond laying on the table"</td>
|
38 |
+
</tr>
|
39 |
+
<tr>
|
40 |
+
<td><img src="examples/macaroni_1.gif" raw=true></td>
|
41 |
+
<td><img src="examples/gold_1.gif" raw=true></td>
|
42 |
+
<td><img src="examples/tree_2.gif" raw=true></td>
|
43 |
+
</tr>
|
44 |
+
<tr>
|
45 |
+
<td width=33% align="center">"close up of macaroni on the plate"</td>
|
46 |
+
<td width=33% align="center">"close up of golden sphere"</td>
|
47 |
+
<td width=33% align="center">"a tree standing in the winter forest"</td>
|
48 |
+
</tr>
|
49 |
+
</table>
|
50 |
+
|
51 |
+
All examples you can see here are originally generated at 512x512 resolution using the 'sd-v1-5-inpainting' model as a base. They were downsized and compressed for better loading speed. You can see them in their original quality in the 'examples' folder. Actual prompts used were stated in the following format: "RAW photo, {subject}, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3", only the 'subject' part is described in the table above.
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
## Dependencies
|
56 |
+
To install all the necessary dependencies, run this command:
|
57 |
+
```
|
58 |
+
pip install opencv-python opencv-contrib-python numpy tqdm h5py scikit-image
|
59 |
+
```
|
60 |
+
You have to set up the RAFT repository as it described here: https://github.com/princeton-vl/RAFT . Basically it just comes down to running "./download_models.sh" in RAFT folder to download the models.
|
61 |
+
|
62 |
+
|
63 |
+
## Running the scripts
|
64 |
+
This script works on top of [Automatic1111/web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) interface via API. To run this script you have to set it up first. You should also have[sd-webui-controlnet](https://github.com/Mikubill/sd-webui-controlnet) extension installed. You need to have the control_hed-fp16 model installed. If you have web-ui with ControlNet working correctly, you have to also allow the API to work with controlNet. To do so, go to the web-ui settings -> ControlNet tab -> Set "Allow other script to control this extension" checkbox to active and set "Multi ControlNet: Max models amount (requires restart)" to more then 2 -> press "Apply settings".
|
65 |
+
|
66 |
+
|
67 |
+
### Video To Video
|
68 |
+
#### Step 1.
|
69 |
+
To process the video, first of all you would need to precompute optical flow data before running web-ui with this command:
|
70 |
+
```
|
71 |
+
python3 compute_flow.py -i "path to your video" -o "path to output file with *.h5 format" -v -W width_of_the_flow_map -H height_of_the_flow_map
|
72 |
+
```
|
73 |
+
The main reason to do this step separately is to save precious GPU memory that will be useful to generate better quality images. Choose W and H parameters as high as your GPU can handle with respect to the proportion of original video resolution. Do not worry if it is higher or less then the processing resolution, flow maps will be scaled accordingly at the processing stage. This will generate quite a large file that may take up to a several gigabytes on the drive even for minute long video. If you want to process a long video consider splitting it into several parts beforehand.
|
74 |
+
|
75 |
+
|
76 |
+
#### Step 2.
|
77 |
+
Run web-ui with '--api' flag. It is also better to use '--xformers' flag, as you would need to have the highest resolution possible and using xformers memory optimization will greatly help.
|
78 |
+
```
|
79 |
+
bash webui.sh --xformers --api
|
80 |
+
```
|
81 |
+
|
82 |
+
|
83 |
+
#### Step 3.
|
84 |
+
Go to the **vid2vid.py** file and change main parameters (INPUT_VIDEO, FLOW_MAPS, OUTPUT_VIDEO, PROMPT, N_PROMPT, W, H) to the ones you need for your project. FLOW_MAPS parameter should contain a path to the flow file that you generated at the first step. The script is pretty simple so you may change other parameters as well, although I would recommend to leave them as is for the first time. Finally run the script with the command:
|
85 |
+
```
|
86 |
+
python3 vid2vid.py
|
87 |
+
```
|
88 |
+
|
89 |
+
|
90 |
+
### Text To Video
|
91 |
+
This method is still in development and works on top of ‘Stable Diffusion’ and 'FloweR' - optical flow reconstruction method that is also in a yearly development stage. Do not expect much from it as it is more of a proof of a concept rather than a complete solution.
|
92 |
+
|
93 |
+
#### Step 1.
|
94 |
+
Download 'FloweR_0.1.pth' model from here: [Google drive link](https://drive.google.com/file/d/1WhzoVIw6Kdg4EjfK9LaTLqFm5dF-IJ7F/view?usp=share_link) and place it in the 'FloweR' folder.
|
95 |
+
|
96 |
+
#### Step 2.
|
97 |
+
Same as with vid2vid case, run web-ui with '--api' flag. It is also better to use '--xformers' flag, as you would need to have the highest resolution possible and using xformers memory optimization will greatly help.
|
98 |
+
```
|
99 |
+
bash webui.sh --xformers --api
|
100 |
+
```
|
101 |
+
|
102 |
+
#### Step 3.
|
103 |
+
Go to the **txt2vid.py** file and change main parameters (OUTPUT_VIDEO, PROMPT, N_PROMPT, W, H) to the ones you need for your project. Again, the script is simple so you may change other parameters if you want to. Finally run the script with the command:
|
104 |
+
```
|
105 |
+
python3 txt2vid.py
|
106 |
+
```
|
107 |
+
|
108 |
+
## Last version changes: v0.5
|
109 |
+
* Fixed an issue with the wrong direction of an optical flow applied to an image.
|
110 |
+
* Added text to video mode within txt2vid.py script. Make sure to update new dependencies for this script to work!
|
111 |
+
* Added a threshold for an optical flow before processing the frame to remove white noise that might appear, as it was suggested by [@alexfredo](https://github.com/alexfredo).
|
112 |
+
* Background removal at flow computation stage implemented by [@CaptnSeraph](https://github.com/CaptnSeraph), it should reduce ghosting effect in most of the videos processed with vid2vid script.
|
113 |
+
|
114 |
+
<!--
|
115 |
+
## Last version changes: v0.6
|
116 |
+
* Added separate flag '-rb' for background removal process at the flow computation stage in the compute_flow.py script.
|
117 |
+
* Added flow normalization before rescaling it, so the magnitude of the flow computed correctly at the different resolution.
|
118 |
+
* Less ghosting and color change in vid2vid mode
|
119 |
+
-->
|
120 |
+
|
121 |
+
<!--
|
122 |
+
## Potential improvements
|
123 |
+
There are several ways overall quality of animation may be improved:
|
124 |
+
* You may use a separate processing for each camera position to get a more consistent style of the characters and less ghosting.
|
125 |
+
* Because the quality of the video depends on how good optical flow was estimated it might be beneficial to use high frame rate video as a source, so it would be easier to guess the flow properly.
|
126 |
+
* The quality of flow estimation might be greatly improved with a proper flow estimation model like this one: https://github.com/autonomousvision/unimatch .
|
127 |
+
-->
|
128 |
+
## Licence
|
129 |
+
This repository can only be used for personal/research/non-commercial purposes. However, for commercial requests, please contact me directly at borsky.alexey@gmail.com
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
+
|
SD-CN-Animation/old_scripts/txt2vid.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import cv2
|
3 |
+
import base64
|
4 |
+
import numpy as np
|
5 |
+
from tqdm import tqdm
|
6 |
+
import os
|
7 |
+
|
8 |
+
import sys
|
9 |
+
sys.path.append('FloweR/')
|
10 |
+
sys.path.append('RAFT/core')
|
11 |
+
|
12 |
+
import torch
|
13 |
+
from model import FloweR
|
14 |
+
from utils import flow_viz
|
15 |
+
|
16 |
+
from flow_utils import *
|
17 |
+
import skimage
|
18 |
+
import datetime
|
19 |
+
|
20 |
+
|
21 |
+
OUTPUT_VIDEO = f'videos/result_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}.mp4'
|
22 |
+
|
23 |
+
PROMPT = "people looking at flying robots. Future. People looking to the sky. Stars in the background. Dramatic light, Cinematic light. Soft lighting, high quality, film grain."
|
24 |
+
N_PROMPT = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, letters, logo, brand, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
|
25 |
+
w,h = 768, 512 # Width and height of the processed image. Note that actual image processed would be a W x H resolution.
|
26 |
+
|
27 |
+
SAVE_FRAMES = True # saves individual frames into 'out' folder if set True. Again might be helpful with long animations
|
28 |
+
|
29 |
+
PROCESSING_STRENGTH = 0.85
|
30 |
+
FIX_STRENGTH = 0.35
|
31 |
+
|
32 |
+
CFG_SCALE = 5.5
|
33 |
+
|
34 |
+
APPLY_TEMPORALNET = False
|
35 |
+
APPLY_COLOR = False
|
36 |
+
|
37 |
+
VISUALIZE = True
|
38 |
+
DEVICE = 'cuda'
|
39 |
+
|
40 |
+
def to_b64(img):
|
41 |
+
img_cliped = np.clip(img, 0, 255).astype(np.uint8)
|
42 |
+
_, buffer = cv2.imencode('.png', img_cliped)
|
43 |
+
b64img = base64.b64encode(buffer).decode("utf-8")
|
44 |
+
return b64img
|
45 |
+
|
46 |
+
class controlnetRequest():
|
47 |
+
def __init__(self, b64_init_img = None, b64_prev_img = None, b64_color_img = None, ds = 0.35, w=w, h=h, mask = None, seed=-1, mode='img2img'):
|
48 |
+
self.url = f"http://localhost:7860/sdapi/v1/{mode}"
|
49 |
+
self.body = {
|
50 |
+
"init_images": [b64_init_img],
|
51 |
+
"mask": mask,
|
52 |
+
"mask_blur": 0,
|
53 |
+
"inpainting_fill": 1,
|
54 |
+
"inpainting_mask_invert": 0,
|
55 |
+
"prompt": PROMPT,
|
56 |
+
"negative_prompt": N_PROMPT,
|
57 |
+
"seed": seed,
|
58 |
+
"subseed": -1,
|
59 |
+
"subseed_strength": 0,
|
60 |
+
"batch_size": 1,
|
61 |
+
"n_iter": 1,
|
62 |
+
"steps": 15,
|
63 |
+
"cfg_scale": CFG_SCALE,
|
64 |
+
"denoising_strength": ds,
|
65 |
+
"width": w,
|
66 |
+
"height": h,
|
67 |
+
"restore_faces": False,
|
68 |
+
"eta": 0,
|
69 |
+
"sampler_index": "DPM++ 2S a",
|
70 |
+
"control_net_enabled": True,
|
71 |
+
"alwayson_scripts": {
|
72 |
+
"ControlNet":{"args": []}
|
73 |
+
},
|
74 |
+
}
|
75 |
+
|
76 |
+
if APPLY_TEMPORALNET:
|
77 |
+
self.body["alwayson_scripts"]["ControlNet"]["args"].append({
|
78 |
+
"input_image": b64_prev_img,
|
79 |
+
"module": "none",
|
80 |
+
"model": "diff_control_sd15_temporalnet_fp16 [adc6bd97]",
|
81 |
+
"weight": 0.65,
|
82 |
+
"resize_mode": "Just Resize",
|
83 |
+
"lowvram": False,
|
84 |
+
"processor_res": 512,
|
85 |
+
"guidance_start": 0,
|
86 |
+
"guidance_end": 0.65,
|
87 |
+
"guessmode": False
|
88 |
+
})
|
89 |
+
|
90 |
+
if APPLY_COLOR:
|
91 |
+
self.body["alwayson_scripts"]["ControlNet"]["args"].append({
|
92 |
+
"input_image": b64_prev_img,
|
93 |
+
"module": "color",
|
94 |
+
"model": "t2iadapter_color_sd14v1 [8522029d]",
|
95 |
+
"weight": 0.65,
|
96 |
+
"resize_mode": "Just Resize",
|
97 |
+
"lowvram": False,
|
98 |
+
"processor_res": 512,
|
99 |
+
"guidance_start": 0,
|
100 |
+
"guidance_end": 0.65,
|
101 |
+
"guessmode": False
|
102 |
+
})
|
103 |
+
|
104 |
+
|
105 |
+
def sendRequest(self):
|
106 |
+
# Request to web-ui
|
107 |
+
data_js = requests.post(self.url, json=self.body).json()
|
108 |
+
|
109 |
+
# Convert the byte array to a NumPy array
|
110 |
+
image_bytes = base64.b64decode(data_js["images"][0])
|
111 |
+
np_array = np.frombuffer(image_bytes, dtype=np.uint8)
|
112 |
+
|
113 |
+
# Convert the NumPy array to a cv2 image
|
114 |
+
out_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
|
115 |
+
return out_image
|
116 |
+
|
117 |
+
|
118 |
+
|
119 |
+
if VISUALIZE: cv2.namedWindow('Out img')
|
120 |
+
|
121 |
+
|
122 |
+
# Create an output video file with the same fps, width, and height as the input video
|
123 |
+
output_video = cv2.VideoWriter(OUTPUT_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), 15, (w, h))
|
124 |
+
|
125 |
+
prev_frame = None
|
126 |
+
prev_frame_styled = None
|
127 |
+
|
128 |
+
|
129 |
+
# Instantiate the model
|
130 |
+
model = FloweR(input_size = (h, w))
|
131 |
+
model.load_state_dict(torch.load('FloweR/FloweR_0.1.1.pth'))
|
132 |
+
# Move the model to the device
|
133 |
+
model = model.to(DEVICE)
|
134 |
+
|
135 |
+
|
136 |
+
init_frame = controlnetRequest(mode='txt2img', ds=PROCESSING_STRENGTH, w=w, h=h).sendRequest()
|
137 |
+
|
138 |
+
output_video.write(init_frame)
|
139 |
+
prev_frame = init_frame
|
140 |
+
|
141 |
+
clip_frames = np.zeros((4, h, w, 3), dtype=np.uint8)
|
142 |
+
|
143 |
+
color_shift = np.zeros((0, 3))
|
144 |
+
color_scale = np.zeros((0, 3))
|
145 |
+
for ind in tqdm(range(450)):
|
146 |
+
clip_frames = np.roll(clip_frames, -1, axis=0)
|
147 |
+
clip_frames[-1] = prev_frame
|
148 |
+
|
149 |
+
clip_frames_torch = frames_norm(torch.from_numpy(clip_frames).to(DEVICE, dtype=torch.float32))
|
150 |
+
|
151 |
+
with torch.no_grad():
|
152 |
+
pred_data = model(clip_frames_torch.unsqueeze(0))[0]
|
153 |
+
|
154 |
+
pred_flow = flow_renorm(pred_data[...,:2]).cpu().numpy()
|
155 |
+
pred_occl = occl_renorm(pred_data[...,2:3]).cpu().numpy().repeat(3, axis = -1)
|
156 |
+
|
157 |
+
pred_flow = pred_flow / (1 + np.linalg.norm(pred_flow, axis=-1, keepdims=True) * 0.05)
|
158 |
+
pred_flow = cv2.GaussianBlur(pred_flow, (31,31), 1, cv2.BORDER_REFLECT_101)
|
159 |
+
|
160 |
+
|
161 |
+
pred_occl = cv2.GaussianBlur(pred_occl, (21,21), 2, cv2.BORDER_REFLECT_101)
|
162 |
+
pred_occl = (np.abs(pred_occl / 255) ** 1.5) * 255
|
163 |
+
pred_occl = np.clip(pred_occl * 25, 0, 255).astype(np.uint8)
|
164 |
+
|
165 |
+
flow_map = pred_flow.copy()
|
166 |
+
flow_map[:,:,0] += np.arange(w)
|
167 |
+
flow_map[:,:,1] += np.arange(h)[:,np.newaxis]
|
168 |
+
|
169 |
+
warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_CUBIC, borderMode = cv2.BORDER_REFLECT_101)
|
170 |
+
|
171 |
+
out_image = warped_frame.copy()
|
172 |
+
|
173 |
+
out_image = controlnetRequest(
|
174 |
+
b64_init_img = to_b64(out_image),
|
175 |
+
b64_prev_img = to_b64(prev_frame),
|
176 |
+
b64_color_img = to_b64(warped_frame),
|
177 |
+
mask = to_b64(pred_occl),
|
178 |
+
ds=PROCESSING_STRENGTH, w=w, h=h).sendRequest()
|
179 |
+
|
180 |
+
out_image = controlnetRequest(
|
181 |
+
b64_init_img = to_b64(out_image),
|
182 |
+
b64_prev_img = to_b64(prev_frame),
|
183 |
+
b64_color_img = to_b64(warped_frame),
|
184 |
+
mask = None,
|
185 |
+
ds=FIX_STRENGTH, w=w, h=h).sendRequest()
|
186 |
+
|
187 |
+
# These step is necessary to reduce color drift of the image that some models may cause
|
188 |
+
out_image = skimage.exposure.match_histograms(out_image, init_frame, multichannel=True, channel_axis=-1)
|
189 |
+
|
190 |
+
output_video.write(out_image)
|
191 |
+
if SAVE_FRAMES:
|
192 |
+
if not os.path.isdir('out'): os.makedirs('out')
|
193 |
+
cv2.imwrite(f'out/{ind+1:05d}.png', out_image)
|
194 |
+
|
195 |
+
pred_flow_img = flow_viz.flow_to_image(pred_flow)
|
196 |
+
frames_img = cv2.hconcat(list(clip_frames))
|
197 |
+
data_img = cv2.hconcat([pred_flow_img, pred_occl, warped_frame, out_image])
|
198 |
+
|
199 |
+
cv2.imshow('Out img', cv2.vconcat([frames_img, data_img]))
|
200 |
+
if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
|
201 |
+
|
202 |
+
prev_frame = out_image.copy()
|
203 |
+
|
204 |
+
# Release the input and output video files
|
205 |
+
output_video.release()
|
206 |
+
|
207 |
+
# Close all windows
|
208 |
+
if VISUALIZE: cv2.destroyAllWindows()
|
SD-CN-Animation/old_scripts/vid2vid.py
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import cv2
|
3 |
+
import base64
|
4 |
+
import numpy as np
|
5 |
+
from tqdm import tqdm
|
6 |
+
import os
|
7 |
+
|
8 |
+
import h5py
|
9 |
+
from flow_utils import compute_diff_map
|
10 |
+
|
11 |
+
import skimage
|
12 |
+
import datetime
|
13 |
+
|
14 |
+
INPUT_VIDEO = "/media/alex/ded3efe6-5825-429d-ac89-7ded676a2b6d/media/Peter_Gabriel/pexels-monstera-5302599-4096x2160-30fps.mp4"
|
15 |
+
FLOW_MAPS = "/media/alex/ded3efe6-5825-429d-ac89-7ded676a2b6d/media/Peter_Gabriel/pexels-monstera-5302599-4096x2160-30fps.h5"
|
16 |
+
OUTPUT_VIDEO = f'videos/result_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}.mp4'
|
17 |
+
|
18 |
+
PROMPT = "Underwater shot Peter Gabriel with closed eyes in Peter Gabriel's music video. 80's music video. VHS style. Dramatic light, Cinematic light. RAW photo, 8k uhd, dslr, soft lighting, high quality, film grain."
|
19 |
+
N_PROMPT = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
|
20 |
+
w,h = 1088, 576 # Width and height of the processed image. Note that actual image processed would be a W x H resolution.
|
21 |
+
|
22 |
+
START_FROM_IND = 0 # index of a frame to start a processing from. Might be helpful with long animations where you need to restart the script multiple times
|
23 |
+
SAVE_FRAMES = True # saves individual frames into 'out' folder if set True. Again might be helpful with long animations
|
24 |
+
|
25 |
+
PROCESSING_STRENGTH = 0.95
|
26 |
+
BLUR_FIX_STRENGTH = 0.15
|
27 |
+
|
28 |
+
APPLY_HED = True
|
29 |
+
APPLY_CANNY = False
|
30 |
+
APPLY_DEPTH = False
|
31 |
+
GUESSMODE = False
|
32 |
+
|
33 |
+
CFG_SCALE = 5.5
|
34 |
+
|
35 |
+
VISUALIZE = True
|
36 |
+
|
37 |
+
def to_b64(img):
|
38 |
+
img_cliped = np.clip(img, 0, 255).astype(np.uint8)
|
39 |
+
_, buffer = cv2.imencode('.png', img_cliped)
|
40 |
+
b64img = base64.b64encode(buffer).decode("utf-8")
|
41 |
+
return b64img
|
42 |
+
|
43 |
+
class controlnetRequest():
|
44 |
+
def __init__(self, b64_cur_img, b64_hed_img, ds = 0.35, w=w, h=h, mask = None, seed=-1):
|
45 |
+
self.url = "http://localhost:7860/sdapi/v1/img2img"
|
46 |
+
self.body = {
|
47 |
+
"init_images": [b64_cur_img],
|
48 |
+
"mask": mask,
|
49 |
+
"mask_blur": 0,
|
50 |
+
"inpainting_fill": 1,
|
51 |
+
"inpainting_mask_invert": 0,
|
52 |
+
"prompt": PROMPT,
|
53 |
+
"negative_prompt": N_PROMPT,
|
54 |
+
"seed": seed,
|
55 |
+
"subseed": -1,
|
56 |
+
"subseed_strength": 0,
|
57 |
+
"batch_size": 1,
|
58 |
+
"n_iter": 1,
|
59 |
+
"steps": 15,
|
60 |
+
"cfg_scale": CFG_SCALE,
|
61 |
+
"denoising_strength": ds,
|
62 |
+
"width": w,
|
63 |
+
"height": h,
|
64 |
+
"restore_faces": False,
|
65 |
+
"eta": 0,
|
66 |
+
"sampler_index": "DPM++ 2S a",
|
67 |
+
"control_net_enabled": True,
|
68 |
+
"alwayson_scripts": {
|
69 |
+
"ControlNet":{"args": []}
|
70 |
+
},
|
71 |
+
}
|
72 |
+
|
73 |
+
if APPLY_HED:
|
74 |
+
self.body["alwayson_scripts"]["ControlNet"]["args"].append({
|
75 |
+
"input_image": b64_hed_img,
|
76 |
+
"module": "hed",
|
77 |
+
"model": "control_hed-fp16 [13fee50b]",
|
78 |
+
"weight": 0.65,
|
79 |
+
"resize_mode": "Just Resize",
|
80 |
+
"lowvram": False,
|
81 |
+
"processor_res": 512,
|
82 |
+
"guidance_start": 0,
|
83 |
+
"guidance_end": 0.65,
|
84 |
+
"guessmode": GUESSMODE
|
85 |
+
})
|
86 |
+
|
87 |
+
if APPLY_CANNY:
|
88 |
+
self.body["alwayson_scripts"]["ControlNet"]["args"].append({
|
89 |
+
"input_image": b64_hed_img,
|
90 |
+
"module": "canny",
|
91 |
+
"model": "control_canny-fp16 [e3fe7712]",
|
92 |
+
"weight": 0.85,
|
93 |
+
"resize_mode": "Just Resize",
|
94 |
+
"lowvram": False,
|
95 |
+
"threshold_a": 35,
|
96 |
+
"threshold_b": 35,
|
97 |
+
"processor_res": 512,
|
98 |
+
"guidance_start": 0,
|
99 |
+
"guidance_end": 0.85,
|
100 |
+
"guessmode": GUESSMODE
|
101 |
+
})
|
102 |
+
|
103 |
+
if APPLY_DEPTH:
|
104 |
+
self.body["alwayson_scripts"]["ControlNet"]["args"].append({
|
105 |
+
"input_image": b64_hed_img,
|
106 |
+
"module": "depth",
|
107 |
+
"model": "control_depth-fp16 [400750f6]",
|
108 |
+
"weight": 0.85,
|
109 |
+
"resize_mode": "Just Resize",
|
110 |
+
"lowvram": False,
|
111 |
+
"processor_res": 512,
|
112 |
+
"guidance_start": 0,
|
113 |
+
"guidance_end": 0.85,
|
114 |
+
"guessmode": GUESSMODE
|
115 |
+
})
|
116 |
+
|
117 |
+
|
118 |
+
def sendRequest(self):
|
119 |
+
# Request to web-ui
|
120 |
+
data_js = requests.post(self.url, json=self.body).json()
|
121 |
+
|
122 |
+
# Convert the byte array to a NumPy array
|
123 |
+
image_bytes = base64.b64decode(data_js["images"][0])
|
124 |
+
np_array = np.frombuffer(image_bytes, dtype=np.uint8)
|
125 |
+
|
126 |
+
# Convert the NumPy array to a cv2 image
|
127 |
+
out_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
|
128 |
+
return out_image
|
129 |
+
|
130 |
+
|
131 |
+
|
132 |
+
if VISUALIZE: cv2.namedWindow('Out img')
|
133 |
+
|
134 |
+
# Open the input video file
|
135 |
+
input_video = cv2.VideoCapture(INPUT_VIDEO)
|
136 |
+
|
137 |
+
# Get useful info from the source video
|
138 |
+
fps = int(input_video.get(cv2.CAP_PROP_FPS))
|
139 |
+
total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
|
140 |
+
|
141 |
+
# Create an output video file with the same fps, width, and height as the input video
|
142 |
+
output_video = cv2.VideoWriter(OUTPUT_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
143 |
+
|
144 |
+
prev_frame = None
|
145 |
+
prev_frame_styled = None
|
146 |
+
#init_image = None
|
147 |
+
|
148 |
+
# reading flow maps in a stream manner
|
149 |
+
with h5py.File(FLOW_MAPS, 'r') as f:
|
150 |
+
flow_maps = f['flow_maps']
|
151 |
+
|
152 |
+
for ind in tqdm(range(total_frames)):
|
153 |
+
# Read the next frame from the input video
|
154 |
+
if not input_video.isOpened(): break
|
155 |
+
ret, cur_frame = input_video.read()
|
156 |
+
if not ret: break
|
157 |
+
|
158 |
+
if ind+1 < START_FROM_IND: continue
|
159 |
+
|
160 |
+
is_keyframe = True
|
161 |
+
if prev_frame is not None:
|
162 |
+
# Compute absolute difference between current and previous frame
|
163 |
+
frames_diff = cv2.absdiff(cur_frame, prev_frame)
|
164 |
+
# Compute mean of absolute difference
|
165 |
+
mean_diff = cv2.mean(frames_diff)[0]
|
166 |
+
# Check if mean difference is above threshold
|
167 |
+
is_keyframe = mean_diff > 30
|
168 |
+
|
169 |
+
# Generate course version of a current frame with previous stylized frame as a reference image
|
170 |
+
if is_keyframe:
|
171 |
+
# Resize the frame to proper resolution
|
172 |
+
frame = cv2.resize(cur_frame, (w, h))
|
173 |
+
|
174 |
+
# Processing current frame with current frame as a mask without any inpainting
|
175 |
+
out_image = controlnetRequest(to_b64(frame), to_b64(frame), PROCESSING_STRENGTH, w, h, mask = None).sendRequest()
|
176 |
+
|
177 |
+
alpha_img = out_image.copy()
|
178 |
+
out_image_ = out_image.copy()
|
179 |
+
warped_styled = out_image.copy()
|
180 |
+
#init_image = out_image.copy()
|
181 |
+
else:
|
182 |
+
# Resize the frame to proper resolution
|
183 |
+
frame = cv2.resize(cur_frame, (w, h))
|
184 |
+
prev_frame = cv2.resize(prev_frame, (w, h))
|
185 |
+
|
186 |
+
# Processing current frame with current frame as a mask without any inpainting
|
187 |
+
out_image = controlnetRequest(to_b64(frame), to_b64(frame), PROCESSING_STRENGTH, w, h, mask = None).sendRequest()
|
188 |
+
|
189 |
+
next_flow, prev_flow = flow_maps[ind-1].astype(np.float32)
|
190 |
+
alpha_mask, warped_styled = compute_diff_map(next_flow, prev_flow, prev_frame, frame, prev_frame_styled)
|
191 |
+
|
192 |
+
# This clipping at lower side required to fix small trailing issues that for some reason left outside of the bright part of the mask,
|
193 |
+
# and at the higher part it making parts changed strongly to do it with less flickering.
|
194 |
+
alpha_mask = np.clip(alpha_mask + 0.05, 0.05, 0.95)
|
195 |
+
alpha_img = np.clip(alpha_mask * 255, 0, 255).astype(np.uint8)
|
196 |
+
|
197 |
+
# normalizing the colors
|
198 |
+
out_image = skimage.exposure.match_histograms(out_image, frame, multichannel=False, channel_axis=-1)
|
199 |
+
|
200 |
+
out_image = out_image.astype(float) * alpha_mask + warped_styled.astype(float) * (1 - alpha_mask)
|
201 |
+
|
202 |
+
#out_image = skimage.exposure.match_histograms(out_image, prev_frame, multichannel=True, channel_axis=-1)
|
203 |
+
#out_image_ = (out_image * 0.65 + warped_styled * 0.35)
|
204 |
+
|
205 |
+
|
206 |
+
# Bluring issue fix via additional processing
|
207 |
+
out_image_fixed = controlnetRequest(to_b64(out_image), to_b64(frame), BLUR_FIX_STRENGTH, w, h, mask = None, seed=8888).sendRequest()
|
208 |
+
|
209 |
+
|
210 |
+
# Write the frame to the output video
|
211 |
+
frame_out = np.clip(out_image_fixed, 0, 255).astype(np.uint8)
|
212 |
+
output_video.write(frame_out)
|
213 |
+
|
214 |
+
if VISUALIZE:
|
215 |
+
# show the last written frame - useful to catch any issue with the process
|
216 |
+
warped_styled = np.clip(warped_styled, 0, 255).astype(np.uint8)
|
217 |
+
|
218 |
+
img_show_top = cv2.hconcat([frame, warped_styled])
|
219 |
+
img_show_bot = cv2.hconcat([frame_out, alpha_img])
|
220 |
+
cv2.imshow('Out img', cv2.vconcat([img_show_top, img_show_bot]))
|
221 |
+
cv2.setWindowTitle("Out img", str(ind+1))
|
222 |
+
if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
|
223 |
+
|
224 |
+
if SAVE_FRAMES:
|
225 |
+
if not os.path.isdir('out'): os.makedirs('out')
|
226 |
+
cv2.imwrite(f'out/{ind+1:05d}.png', frame_out)
|
227 |
+
|
228 |
+
prev_frame = cur_frame.copy()
|
229 |
+
prev_frame_styled = out_image.copy()
|
230 |
+
|
231 |
+
|
232 |
+
# Release the input and output video files
|
233 |
+
input_video.release()
|
234 |
+
output_video.release()
|
235 |
+
|
236 |
+
# Close all windows
|
237 |
+
if VISUALIZE: cv2.destroyAllWindows()
|
SD-CN-Animation/readme.md
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SD-CN-Animation
|
2 |
+
This project allows you to automate video stylization task using StableDiffusion and ControlNet. It also allows you to generate completely new videos from text at any resolution and length in contrast to other current text2video methods using any Stable Diffusion model as a backbone, including custom ones. It uses '[RAFT](https://github.com/princeton-vl/RAFT)' optical flow estimation algorithm to keep the animation stable and create an occlusion mask that is used to generate the next frame. In text to video mode it relies on 'FloweR' method (work in progress) that predicts optical flow from the previous frames.
|
3 |
+
|
4 |
+
![sd-cn-animation ui preview](examples/ui_preview.png)
|
5 |
+
sd-cn-animation ui preview
|
6 |
+
|
7 |
+
**In vid2vid mode do not forget to activate ControlNet model to achieve better results. Without it the resulting video might be quite choppy. Do not put any images in CN as the frames would pass automatically from the video.**
|
8 |
+
Here are CN parameters that seem to give the best results so far:
|
9 |
+
![sd-cn-animation cn params](examples/cn_settings.png)
|
10 |
+
|
11 |
+
|
12 |
+
### Video to Video Examples:
|
13 |
+
</table>
|
14 |
+
<table class="center">
|
15 |
+
<tr>
|
16 |
+
<td><img src="examples/girl_org.gif" raw=true></td>
|
17 |
+
<td><img src="examples/girl_to_jc.gif" raw=true></td>
|
18 |
+
<td><img src="examples/girl_to_wc.gif" raw=true></td>
|
19 |
+
</tr>
|
20 |
+
<tr>
|
21 |
+
<td width=33% align="center">Original video</td>
|
22 |
+
<td width=33% align="center">"Jessica Chastain"</td>
|
23 |
+
<td width=33% align="center">"Watercolor painting"</td>
|
24 |
+
</tr>
|
25 |
+
</table>
|
26 |
+
|
27 |
+
Examples presented are generated at 1024x576 resolution using the 'realisticVisionV13_v13' model as a base. They were cropt, downsized and compressed for better loading speed. You can see them in their original quality in the 'examples' folder.
|
28 |
+
|
29 |
+
### Text to Video Examples:
|
30 |
+
</table>
|
31 |
+
<table class="center">
|
32 |
+
<tr>
|
33 |
+
<td><img src="examples/flower_1.gif" raw=true></td>
|
34 |
+
<td><img src="examples/bonfire_1.gif" raw=true></td>
|
35 |
+
<td><img src="examples/diamond_4.gif" raw=true></td>
|
36 |
+
</tr>
|
37 |
+
<tr>
|
38 |
+
<td width=33% align="center">"close up of a flower"</td>
|
39 |
+
<td width=33% align="center">"bonfire near the camp in the mountains at night"</td>
|
40 |
+
<td width=33% align="center">"close up of a diamond laying on the table"</td>
|
41 |
+
</tr>
|
42 |
+
<tr>
|
43 |
+
<td><img src="examples/macaroni_1.gif" raw=true></td>
|
44 |
+
<td><img src="examples/gold_1.gif" raw=true></td>
|
45 |
+
<td><img src="examples/tree_2.gif" raw=true></td>
|
46 |
+
</tr>
|
47 |
+
<tr>
|
48 |
+
<td width=33% align="center">"close up of macaroni on the plate"</td>
|
49 |
+
<td width=33% align="center">"close up of golden sphere"</td>
|
50 |
+
<td width=33% align="center">"a tree standing in the winter forest"</td>
|
51 |
+
</tr>
|
52 |
+
</table>
|
53 |
+
|
54 |
+
All examples you can see here are originally generated at 512x512 resolution using the 'sd-v1-5-inpainting' model as a base. They were downsized and compressed for better loading speed. You can see them in their original quality in the 'examples' folder. Actual prompts used were stated in the following format: "RAW photo, {subject}, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3", only the 'subject' part is described in the table above.
|
55 |
+
|
56 |
+
## Installing the extension
|
57 |
+
To install the extension go to 'Extensions' tab in [Automatic1111 web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui), then go to 'Install from URL' tab. In 'URL for extension's git repository' field inter the path to this repository, i.e. 'https://github.com/volotat/SD-CN-Animation.git'. Leave 'Local directory name' field empty. Then just press 'Install' button. Restart web-ui, new 'SD-CN-Animation' tab should appear. All generated video will be saved into 'stable-diffusion-webui/outputs/sd-cn-animation' folder.
|
58 |
+
|
59 |
+
## Known issues
|
60 |
+
* If you see error like this ```IndexError: list index out of range``` try to restart webui, it should fix it. If the issue still prevelent try to uninstall and reinstall scikit-image==0.19.2 with no --no-cache-dir flag like this.
|
61 |
+
```
|
62 |
+
pip uninstall scikit-image
|
63 |
+
pip install scikit-image==0.19.2 --no-cache-dir
|
64 |
+
```
|
65 |
+
* The extension might work incorrectly if 'Apply color correction to img2img results to match original colors.' option is enabled. Make sure to disable it in 'Settings' tab -> 'Stable Diffusion' section.
|
66 |
+
* If you have an error like 'Need to enable queue to use generators.', please update webui to the latest version. Beware that only [Automatic1111 web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) is fully supported.
|
67 |
+
* The extension is not compatible with Macs. If you have a case that extension is working for you or do you know how to make it compatible, please open a new discussion.
|
68 |
+
|
69 |
+
## Last version changes: v0.9
|
70 |
+
* Fixed issues #69, #76, #91, #92.
|
71 |
+
* Fixed an issue in vid2vid mode when an occlusion mask computed from the optical flow may include unnecessary parts (where flow is non-zero).
|
72 |
+
* Added 'Extra params' in vid2vid mode for more fine-grain controls of the processing pipeline.
|
73 |
+
* Better default parameters set for vid2vid pipeline.
|
74 |
+
* In txt2vid mode after the first frame is generated the seed is now automatically set to -1 to prevent blurring issues.
|
75 |
+
* Added an option to save resulting frames into a folder alongside the video.
|
76 |
+
* Added ability to export current parameters in a human readable form as a json.
|
77 |
+
* Interpolation mode in the flow-applying stage is set to ‘nearest’ to reduce overtime image blurring.
|
78 |
+
* Added ControlNet to txt2vid mode as well as fixing #86 issue, thanks to [@mariaWitch](https://github.com/mariaWitch)
|
79 |
+
* Fixed a major issue when ConrtolNet used wrong input images. Because of this vid2vid results were way worse than they should be.
|
80 |
+
* Text to video mode now supports video as a guidance for ControlNet. It allows to create much stronger video stylizations.
|
81 |
+
|
82 |
+
<!--
|
83 |
+
* ControlNet with preprocessers like "reference_only", "reference_adain", "reference_adain+attn" are not reseted with video frames to have an ability to control style of the video.
|
84 |
+
* Fixed an issue because of witch 'processing_strength' UI parameters does not actually affected denoising strength at the fist processing step.
|
85 |
+
* Fixed issue #112. It will not try to reinstall requirements at every start of webui.
|
86 |
+
* Some improvements in text 2 video method.
|
87 |
+
* Parameters used to generated a video now automatically saved in video's folder.
|
88 |
+
* Added ability to control what frame will be send to CN in text to video mode.
|
89 |
+
-->
|
SD-CN-Animation/requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
scikit-image
|
SD-CN-Animation/scripts/__pycache__/base_ui.cpython-310.pyc
ADDED
Binary file (11.6 kB). View file
|
|
SD-CN-Animation/scripts/base_ui.py
ADDED
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys, os
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import modules
|
5 |
+
from types import SimpleNamespace
|
6 |
+
|
7 |
+
from modules import script_callbacks, shared
|
8 |
+
from modules.shared import cmd_opts, opts
|
9 |
+
from webui import wrap_gradio_gpu_call
|
10 |
+
|
11 |
+
from modules.ui_components import ToolButton, FormRow, FormGroup
|
12 |
+
from modules.ui import create_override_settings_dropdown
|
13 |
+
import modules.scripts as scripts
|
14 |
+
|
15 |
+
from modules.sd_samplers import samplers_for_img2img
|
16 |
+
from modules.ui import setup_progressbar, create_sampler_and_steps_selection, ordered_ui_categories, create_output_panel
|
17 |
+
|
18 |
+
from scripts.core import vid2vid, txt2vid, utils
|
19 |
+
import traceback
|
20 |
+
|
21 |
+
def V2VArgs():
|
22 |
+
seed = -1
|
23 |
+
width = 1024
|
24 |
+
height = 576
|
25 |
+
cfg_scale = 5.5
|
26 |
+
steps = 15
|
27 |
+
prompt = ""
|
28 |
+
n_prompt = "text, letters, logo, brand, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
|
29 |
+
processing_strength = 0.85
|
30 |
+
fix_frame_strength = 0.15
|
31 |
+
return locals()
|
32 |
+
|
33 |
+
def T2VArgs():
|
34 |
+
seed = -1
|
35 |
+
width = 768
|
36 |
+
height = 512
|
37 |
+
cfg_scale = 5.5
|
38 |
+
steps = 15
|
39 |
+
prompt = ""
|
40 |
+
n_prompt = "((blur, blurr, blurred, blurry, fuzzy, unclear, unfocus, bocca effect)), text, letters, logo, brand, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
|
41 |
+
processing_strength = 0.75
|
42 |
+
fix_frame_strength = 0.35
|
43 |
+
return locals()
|
44 |
+
|
45 |
+
def setup_common_values(mode, d):
|
46 |
+
with gr.Row():
|
47 |
+
width = gr.Slider(label='Width', minimum=64, maximum=2048, step=64, value=d.width, interactive=True)
|
48 |
+
height = gr.Slider(label='Height', minimum=64, maximum=2048, step=64, value=d.height, interactive=True)
|
49 |
+
with gr.Row(elem_id=f'{mode}_prompt_toprow'):
|
50 |
+
prompt = gr.Textbox(label='Prompt', lines=3, interactive=True, elem_id=f"{mode}_prompt", placeholder="Enter your prompt here...")
|
51 |
+
with gr.Row(elem_id=f'{mode}_n_prompt_toprow'):
|
52 |
+
n_prompt = gr.Textbox(label='Negative prompt', lines=3, interactive=True, elem_id=f"{mode}_n_prompt", value=d.n_prompt)
|
53 |
+
with gr.Row():
|
54 |
+
cfg_scale = gr.Slider(label='CFG scale', minimum=1, maximum=100, step=1, value=d.cfg_scale, interactive=True)
|
55 |
+
with gr.Row():
|
56 |
+
seed = gr.Number(label='Seed (this parameter controls how the first frame looks like and the color distribution of the consecutive frames as they are dependent on the first one)', value = d.seed, Interactive = True, precision=0)
|
57 |
+
with gr.Row():
|
58 |
+
processing_strength = gr.Slider(label="Processing strength (Step 1)", value=d.processing_strength, minimum=0, maximum=1, step=0.05, interactive=True)
|
59 |
+
fix_frame_strength = gr.Slider(label="Fix frame strength (Step 2)", value=d.fix_frame_strength, minimum=0, maximum=1, step=0.05, interactive=True)
|
60 |
+
with gr.Row():
|
61 |
+
sampler_index = gr.Dropdown(label='Sampling method', elem_id=f"{mode}_sampling", choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index", interactive=True)
|
62 |
+
steps = gr.Slider(label="Sampling steps", minimum=1, maximum=150, step=1, elem_id=f"{mode}_steps", value=d.steps, interactive=True)
|
63 |
+
|
64 |
+
return width, height, prompt, n_prompt, cfg_scale, seed, processing_strength, fix_frame_strength, sampler_index, steps
|
65 |
+
|
66 |
+
def inputs_ui():
|
67 |
+
v2v_args = SimpleNamespace(**V2VArgs())
|
68 |
+
t2v_args = SimpleNamespace(**T2VArgs())
|
69 |
+
with gr.Tabs():
|
70 |
+
glo_sdcn_process_mode = gr.State(value='vid2vid')
|
71 |
+
|
72 |
+
with gr.Tab('vid2vid') as tab_vid2vid:
|
73 |
+
with gr.Row():
|
74 |
+
gr.HTML('Input video (each frame will be used as initial image for SD and as input image to CN): *REQUIRED')
|
75 |
+
with gr.Row():
|
76 |
+
v2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="vid_to_vid_chosen_file")
|
77 |
+
|
78 |
+
v2v_width, v2v_height, v2v_prompt, v2v_n_prompt, v2v_cfg_scale, v2v_seed, v2v_processing_strength, v2v_fix_frame_strength, v2v_sampler_index, v2v_steps = setup_common_values('vid2vid', v2v_args)
|
79 |
+
|
80 |
+
with gr.Accordion("Extra settings",open=False):
|
81 |
+
gr.HTML('# Occlusion mask params:')
|
82 |
+
with gr.Row():
|
83 |
+
with gr.Column(scale=1, variant='compact'):
|
84 |
+
v2v_occlusion_mask_blur = gr.Slider(label='Occlusion blur strength', minimum=0, maximum=10, step=0.1, value=3, interactive=True)
|
85 |
+
gr.HTML('')
|
86 |
+
v2v_occlusion_mask_trailing = gr.Checkbox(label="Occlusion trailing", info="Reduce ghosting but adds more flickering to the video", value=True, interactive=True)
|
87 |
+
with gr.Column(scale=1, variant='compact'):
|
88 |
+
v2v_occlusion_mask_flow_multiplier = gr.Slider(label='Occlusion flow multiplier', minimum=0, maximum=10, step=0.1, value=5, interactive=True)
|
89 |
+
v2v_occlusion_mask_difo_multiplier = gr.Slider(label='Occlusion diff origin multiplier', minimum=0, maximum=10, step=0.1, value=2, interactive=True)
|
90 |
+
v2v_occlusion_mask_difs_multiplier = gr.Slider(label='Occlusion diff styled multiplier', minimum=0, maximum=10, step=0.1, value=0, interactive=True)
|
91 |
+
|
92 |
+
with gr.Row():
|
93 |
+
with gr.Column(scale=1, variant='compact'):
|
94 |
+
gr.HTML('# Step 1 params:')
|
95 |
+
v2v_step_1_seed = gr.Number(label='Seed', value = -1, Interactive = True, precision=0)
|
96 |
+
gr.HTML('<br>')
|
97 |
+
v2v_step_1_blend_alpha = gr.Slider(label='Warped prev frame vs Current frame blend alpha', minimum=0, maximum=1, step=0.1, value=1, interactive=True)
|
98 |
+
v2v_step_1_processing_mode = gr.Radio(["Process full image then blend in occlusions", "Inpaint occlusions"], type="index", \
|
99 |
+
label="Processing mode", value="Process full image then blend in occlusions", interactive=True)
|
100 |
+
|
101 |
+
|
102 |
+
with gr.Column(scale=1, variant='compact'):
|
103 |
+
gr.HTML('# Step 2 params:')
|
104 |
+
v2v_step_2_seed = gr.Number(label='Seed', value = 8888, Interactive = True, precision=0)
|
105 |
+
|
106 |
+
with FormRow(elem_id="vid2vid_override_settings_row") as row:
|
107 |
+
v2v_override_settings = create_override_settings_dropdown("vid2vid", row)
|
108 |
+
|
109 |
+
with FormGroup(elem_id=f"script_container"):
|
110 |
+
v2v_custom_inputs = scripts.scripts_img2img.setup_ui()
|
111 |
+
|
112 |
+
with gr.Tab('txt2vid') as tab_txt2vid:
|
113 |
+
with gr.Row():
|
114 |
+
gr.HTML('Control video (each frame will be used as input image to CN): *NOT REQUIRED')
|
115 |
+
with gr.Row():
|
116 |
+
t2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="tex_to_vid_chosen_file")
|
117 |
+
t2v_init_image = gr.Image(label="Input image", interactive=True, file_count="single", file_types=["image"], elem_id="tex_to_vid_init_image")
|
118 |
+
|
119 |
+
t2v_width, t2v_height, t2v_prompt, t2v_n_prompt, t2v_cfg_scale, t2v_seed, t2v_processing_strength, t2v_fix_frame_strength, t2v_sampler_index, t2v_steps = setup_common_values('txt2vid', t2v_args)
|
120 |
+
|
121 |
+
with gr.Row():
|
122 |
+
t2v_length = gr.Slider(label='Length (in frames)', minimum=10, maximum=2048, step=10, value=40, interactive=True)
|
123 |
+
t2v_fps = gr.Slider(label='Video FPS', minimum=4, maximum=64, step=4, value=12, interactive=True)
|
124 |
+
|
125 |
+
gr.HTML('<br>')
|
126 |
+
t2v_cn_frame_send = gr.Radio(["None", "Current generated frame", "Previous generated frame", "Current reference video frame"], type="index", \
|
127 |
+
label="What frame should be send to CN?", value="None", interactive=True)
|
128 |
+
|
129 |
+
with FormRow(elem_id="txt2vid_override_settings_row") as row:
|
130 |
+
t2v_override_settings = create_override_settings_dropdown("txt2vid", row)
|
131 |
+
|
132 |
+
with FormGroup(elem_id=f"script_container"):
|
133 |
+
t2v_custom_inputs = scripts.scripts_txt2img.setup_ui()
|
134 |
+
|
135 |
+
tab_vid2vid.select(fn=lambda: 'vid2vid', inputs=[], outputs=[glo_sdcn_process_mode])
|
136 |
+
tab_txt2vid.select(fn=lambda: 'txt2vid', inputs=[], outputs=[glo_sdcn_process_mode])
|
137 |
+
|
138 |
+
return locals()
|
139 |
+
|
140 |
+
def process(*args):
|
141 |
+
msg = 'Done'
|
142 |
+
try:
|
143 |
+
if args[0] == 'vid2vid':
|
144 |
+
yield from vid2vid.start_process(*args)
|
145 |
+
elif args[0] == 'txt2vid':
|
146 |
+
yield from txt2vid.start_process(*args)
|
147 |
+
else:
|
148 |
+
msg = f"Unsupported processing mode: '{args[0]}'"
|
149 |
+
raise Exception(msg)
|
150 |
+
except Exception as error:
|
151 |
+
# handle the exception
|
152 |
+
msg = f"An exception occurred while trying to process the frame: {error}"
|
153 |
+
print(msg)
|
154 |
+
traceback.print_exc()
|
155 |
+
|
156 |
+
yield msg, gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Video.update(), gr.Button.update(interactive=True), gr.Button.update(interactive=False)
|
157 |
+
|
158 |
+
def stop_process(*args):
|
159 |
+
utils.shared.is_interrupted = True
|
160 |
+
return gr.Button.update(interactive=False)
|
161 |
+
|
162 |
+
|
163 |
+
|
164 |
+
def on_ui_tabs():
|
165 |
+
modules.scripts.scripts_current = modules.scripts.scripts_img2img
|
166 |
+
modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True)
|
167 |
+
|
168 |
+
with gr.Blocks(analytics_enabled=False) as sdcnanim_interface:
|
169 |
+
components = {}
|
170 |
+
|
171 |
+
#dv = SimpleNamespace(**T2VOutputArgs())
|
172 |
+
with gr.Row(elem_id='sdcn-core').style(equal_height=False, variant='compact'):
|
173 |
+
with gr.Column(scale=1, variant='panel'):
|
174 |
+
#with gr.Tabs():
|
175 |
+
components = inputs_ui()
|
176 |
+
|
177 |
+
with gr.Accordion("Export settings", open=False):
|
178 |
+
export_settings_button = gr.Button('Export', elem_id=f"sdcn_export_settings_button")
|
179 |
+
export_setting_json = gr.Code(value='')
|
180 |
+
|
181 |
+
|
182 |
+
with gr.Column(scale=1, variant='compact'):
|
183 |
+
with gr.Row(variant='compact'):
|
184 |
+
run_button = gr.Button('Generate', elem_id=f"sdcn_anim_generate", variant='primary')
|
185 |
+
stop_button = gr.Button('Interrupt', elem_id=f"sdcn_anim_interrupt", variant='primary', interactive=False)
|
186 |
+
|
187 |
+
save_frames_check = gr.Checkbox(label="Save frames into a folder nearby a video (check it before running the generation if you also want to save frames separately)", value=True, interactive=True)
|
188 |
+
gr.HTML('<br>')
|
189 |
+
|
190 |
+
with gr.Column(variant="panel"):
|
191 |
+
sp_progress = gr.HTML(elem_id="sp_progress", value="")
|
192 |
+
|
193 |
+
with gr.Row(variant='compact'):
|
194 |
+
img_preview_curr_frame = gr.Image(label='Current frame', elem_id=f"img_preview_curr_frame", type='pil').style(height=240)
|
195 |
+
img_preview_curr_occl = gr.Image(label='Current occlusion', elem_id=f"img_preview_curr_occl", type='pil').style(height=240)
|
196 |
+
with gr.Row(variant='compact'):
|
197 |
+
img_preview_prev_warp = gr.Image(label='Previous frame warped', elem_id=f"img_preview_curr_frame", type='pil').style(height=240)
|
198 |
+
img_preview_processed = gr.Image(label='Processed', elem_id=f"img_preview_processed", type='pil').style(height=240)
|
199 |
+
|
200 |
+
video_preview = gr.Video(interactive=False)
|
201 |
+
|
202 |
+
with gr.Row(variant='compact'):
|
203 |
+
dummy_component = gr.Label(visible=False)
|
204 |
+
|
205 |
+
components['glo_save_frames_check'] = save_frames_check
|
206 |
+
|
207 |
+
# Define parameters for the action methods.
|
208 |
+
utils.shared.v2v_custom_inputs_size = len(components['v2v_custom_inputs'])
|
209 |
+
utils.shared.t2v_custom_inputs_size = len(components['t2v_custom_inputs'])
|
210 |
+
#print('v2v_custom_inputs', len(components['v2v_custom_inputs']), components['v2v_custom_inputs'])
|
211 |
+
#print('t2v_custom_inputs', len(components['t2v_custom_inputs']), components['t2v_custom_inputs'])
|
212 |
+
method_inputs = [components[name] for name in utils.get_component_names()] + components['v2v_custom_inputs'] + components['t2v_custom_inputs']
|
213 |
+
|
214 |
+
method_outputs = [
|
215 |
+
sp_progress,
|
216 |
+
img_preview_curr_frame,
|
217 |
+
img_preview_curr_occl,
|
218 |
+
img_preview_prev_warp,
|
219 |
+
img_preview_processed,
|
220 |
+
video_preview,
|
221 |
+
run_button,
|
222 |
+
stop_button,
|
223 |
+
]
|
224 |
+
|
225 |
+
run_button.click(
|
226 |
+
fn=process, #wrap_gradio_gpu_call(start_process, extra_outputs=[None, '', '']),
|
227 |
+
inputs=method_inputs,
|
228 |
+
outputs=method_outputs,
|
229 |
+
show_progress=True,
|
230 |
+
)
|
231 |
+
|
232 |
+
stop_button.click(
|
233 |
+
fn=stop_process,
|
234 |
+
outputs=[stop_button],
|
235 |
+
show_progress=False
|
236 |
+
)
|
237 |
+
|
238 |
+
export_settings_button.click(
|
239 |
+
fn=utils.export_settings,
|
240 |
+
inputs=method_inputs,
|
241 |
+
outputs=[export_setting_json],
|
242 |
+
show_progress=False
|
243 |
+
)
|
244 |
+
|
245 |
+
modules.scripts.scripts_current = None
|
246 |
+
|
247 |
+
# define queue - required for generators
|
248 |
+
sdcnanim_interface.queue(concurrency_count=1)
|
249 |
+
return [(sdcnanim_interface, "SD-CN-Animation", "sd_cn_animation_interface")]
|
250 |
+
|
251 |
+
|
252 |
+
script_callbacks.on_ui_tabs(on_ui_tabs)
|