toto10 commited on
Commit
a0611b0
1 Parent(s): f874194

00a4ef3f9ad50e484da61893e60b77bb57629f7b9dd7d759346c4dda116cba15

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +12 -0
  2. SD-CN-Animation/FloweR/__pycache__/model.cpython-310.pyc +0 -0
  3. SD-CN-Animation/FloweR/model.py +191 -0
  4. SD-CN-Animation/LICENSE +22 -0
  5. SD-CN-Animation/RAFT/LICENSE +29 -0
  6. SD-CN-Animation/RAFT/__pycache__/corr.cpython-310.pyc +0 -0
  7. SD-CN-Animation/RAFT/__pycache__/extractor.cpython-310.pyc +0 -0
  8. SD-CN-Animation/RAFT/__pycache__/raft.cpython-310.pyc +0 -0
  9. SD-CN-Animation/RAFT/__pycache__/update.cpython-310.pyc +0 -0
  10. SD-CN-Animation/RAFT/corr.py +91 -0
  11. SD-CN-Animation/RAFT/extractor.py +267 -0
  12. SD-CN-Animation/RAFT/raft.py +144 -0
  13. SD-CN-Animation/RAFT/update.py +139 -0
  14. SD-CN-Animation/RAFT/utils/__init__.py +0 -0
  15. SD-CN-Animation/RAFT/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  16. SD-CN-Animation/RAFT/utils/__pycache__/utils.cpython-310.pyc +0 -0
  17. SD-CN-Animation/RAFT/utils/augmentor.py +246 -0
  18. SD-CN-Animation/RAFT/utils/flow_viz.py +132 -0
  19. SD-CN-Animation/RAFT/utils/frame_utils.py +137 -0
  20. SD-CN-Animation/RAFT/utils/utils.py +82 -0
  21. SD-CN-Animation/examples/bonefire_1.mp4 +0 -0
  22. SD-CN-Animation/examples/bonfire_1.gif +0 -0
  23. SD-CN-Animation/examples/cn_settings.png +0 -0
  24. SD-CN-Animation/examples/diamond_4.gif +0 -0
  25. SD-CN-Animation/examples/diamond_4.mp4 +0 -0
  26. SD-CN-Animation/examples/flower_1.gif +3 -0
  27. SD-CN-Animation/examples/flower_1.mp4 +3 -0
  28. SD-CN-Animation/examples/flower_11.mp4 +3 -0
  29. SD-CN-Animation/examples/girl_org.gif +3 -0
  30. SD-CN-Animation/examples/girl_to_jc.gif +3 -0
  31. SD-CN-Animation/examples/girl_to_jc.mp4 +3 -0
  32. SD-CN-Animation/examples/girl_to_wc.gif +3 -0
  33. SD-CN-Animation/examples/girl_to_wc.mp4 +3 -0
  34. SD-CN-Animation/examples/gold_1.gif +3 -0
  35. SD-CN-Animation/examples/gold_1.mp4 +0 -0
  36. SD-CN-Animation/examples/macaroni_1.gif +3 -0
  37. SD-CN-Animation/examples/macaroni_1.mp4 +0 -0
  38. SD-CN-Animation/examples/tree_2.gif +3 -0
  39. SD-CN-Animation/examples/tree_2.mp4 +3 -0
  40. SD-CN-Animation/examples/ui_preview.png +0 -0
  41. SD-CN-Animation/install.py +20 -0
  42. SD-CN-Animation/old_scripts/compute_flow.py +75 -0
  43. SD-CN-Animation/old_scripts/flow_utils.py +139 -0
  44. SD-CN-Animation/old_scripts/readme.md +133 -0
  45. SD-CN-Animation/old_scripts/txt2vid.py +208 -0
  46. SD-CN-Animation/old_scripts/vid2vid.py +237 -0
  47. SD-CN-Animation/readme.md +89 -0
  48. SD-CN-Animation/requirements.txt +1 -0
  49. SD-CN-Animation/scripts/__pycache__/base_ui.cpython-310.pyc +0 -0
  50. SD-CN-Animation/scripts/base_ui.py +252 -0
.gitattributes CHANGED
@@ -43,3 +43,15 @@ sd_feed/assets/pinterest.png filter=lfs diff=lfs merge=lfs -text
43
  sd-3dmodel-loader/models/Samba[[:space:]]Dancing.fbx filter=lfs diff=lfs merge=lfs -text
44
  sd-3dmodel-loader/models/pose.vrm filter=lfs diff=lfs merge=lfs -text
45
  sd-webui-3d-open-pose-editor/downloads/pose/0.5.1675469404/pose_solution_packed_assets.data filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  sd-3dmodel-loader/models/Samba[[:space:]]Dancing.fbx filter=lfs diff=lfs merge=lfs -text
44
  sd-3dmodel-loader/models/pose.vrm filter=lfs diff=lfs merge=lfs -text
45
  sd-webui-3d-open-pose-editor/downloads/pose/0.5.1675469404/pose_solution_packed_assets.data filter=lfs diff=lfs merge=lfs -text
46
+ SD-CN-Animation/examples/flower_1.gif filter=lfs diff=lfs merge=lfs -text
47
+ SD-CN-Animation/examples/flower_1.mp4 filter=lfs diff=lfs merge=lfs -text
48
+ SD-CN-Animation/examples/flower_11.mp4 filter=lfs diff=lfs merge=lfs -text
49
+ SD-CN-Animation/examples/girl_org.gif filter=lfs diff=lfs merge=lfs -text
50
+ SD-CN-Animation/examples/girl_to_jc.gif filter=lfs diff=lfs merge=lfs -text
51
+ SD-CN-Animation/examples/girl_to_jc.mp4 filter=lfs diff=lfs merge=lfs -text
52
+ SD-CN-Animation/examples/girl_to_wc.gif filter=lfs diff=lfs merge=lfs -text
53
+ SD-CN-Animation/examples/girl_to_wc.mp4 filter=lfs diff=lfs merge=lfs -text
54
+ SD-CN-Animation/examples/gold_1.gif filter=lfs diff=lfs merge=lfs -text
55
+ SD-CN-Animation/examples/macaroni_1.gif filter=lfs diff=lfs merge=lfs -text
56
+ SD-CN-Animation/examples/tree_2.gif filter=lfs diff=lfs merge=lfs -text
57
+ SD-CN-Animation/examples/tree_2.mp4 filter=lfs diff=lfs merge=lfs -text
SD-CN-Animation/FloweR/__pycache__/model.cpython-310.pyc ADDED
Binary file (3.84 kB). View file
 
SD-CN-Animation/FloweR/model.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.functional as F
4
+
5
+ # Define the model
6
+ class FloweR(nn.Module):
7
+ def __init__(self, input_size = (384, 384), window_size = 4):
8
+ super(FloweR, self).__init__()
9
+
10
+ self.input_size = input_size
11
+ self.window_size = window_size
12
+
13
+ # 2 channels for optical flow
14
+ # 1 channel for occlusion mask
15
+ # 3 channels for next frame prediction
16
+ self.out_channels = 6
17
+
18
+
19
+ #INPUT: 384 x 384 x 4 * 3
20
+
21
+ ### DOWNSCALE ###
22
+ self.conv_block_1 = nn.Sequential(
23
+ nn.Conv2d(3 * self.window_size, 128, kernel_size=3, stride=1, padding='same'),
24
+ nn.ReLU(),
25
+ ) # 384 x 384 x 128
26
+
27
+ self.conv_block_2 = nn.Sequential(
28
+ nn.AvgPool2d(2),
29
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
30
+ nn.ReLU(),
31
+ ) # 192 x 192 x 128
32
+
33
+ self.conv_block_3 = nn.Sequential(
34
+ nn.AvgPool2d(2),
35
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
36
+ nn.ReLU(),
37
+ ) # 96 x 96 x 128
38
+
39
+ self.conv_block_4 = nn.Sequential(
40
+ nn.AvgPool2d(2),
41
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
42
+ nn.ReLU(),
43
+ ) # 48 x 48 x 128
44
+
45
+ self.conv_block_5 = nn.Sequential(
46
+ nn.AvgPool2d(2),
47
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
48
+ nn.ReLU(),
49
+ ) # 24 x 24 x 128
50
+
51
+ self.conv_block_6 = nn.Sequential(
52
+ nn.AvgPool2d(2),
53
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
54
+ nn.ReLU(),
55
+ ) # 12 x 12 x 128
56
+
57
+ self.conv_block_7 = nn.Sequential(
58
+ nn.AvgPool2d(2),
59
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
60
+ nn.ReLU(),
61
+ ) # 6 x 6 x 128
62
+
63
+ self.conv_block_8 = nn.Sequential(
64
+ nn.AvgPool2d(2),
65
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
66
+ nn.ReLU(),
67
+ ) # 3 x 3 x 128 - 9 input tokens
68
+
69
+ ### Transformer part ###
70
+ # To be done
71
+
72
+ ### UPSCALE ###
73
+ self.conv_block_9 = nn.Sequential(
74
+ nn.Upsample(scale_factor=2),
75
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
76
+ nn.ReLU(),
77
+ ) # 6 x 6 x 128
78
+
79
+ self.conv_block_10 = nn.Sequential(
80
+ nn.Upsample(scale_factor=2),
81
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
82
+ nn.ReLU(),
83
+ ) # 12 x 12 x 128
84
+
85
+ self.conv_block_11 = nn.Sequential(
86
+ nn.Upsample(scale_factor=2),
87
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
88
+ nn.ReLU(),
89
+ ) # 24 x 24 x 128
90
+
91
+ self.conv_block_12 = nn.Sequential(
92
+ nn.Upsample(scale_factor=2),
93
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
94
+ nn.ReLU(),
95
+ ) # 48 x 48 x 128
96
+
97
+ self.conv_block_13 = nn.Sequential(
98
+ nn.Upsample(scale_factor=2),
99
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
100
+ nn.ReLU(),
101
+ ) # 96 x 96 x 128
102
+
103
+ self.conv_block_14 = nn.Sequential(
104
+ nn.Upsample(scale_factor=2),
105
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
106
+ nn.ReLU(),
107
+ ) # 192 x 192 x 128
108
+
109
+ self.conv_block_15 = nn.Sequential(
110
+ nn.Upsample(scale_factor=2),
111
+ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding='same'),
112
+ nn.ReLU(),
113
+ ) # 384 x 384 x 128
114
+
115
+ self.conv_block_16 = nn.Conv2d(128, self.out_channels, kernel_size=3, stride=1, padding='same')
116
+
117
+ def forward(self, input_frames):
118
+
119
+ if input_frames.size(1) != self.window_size:
120
+ raise Exception(f'Shape of the input is not compatable. There should be exactly {self.window_size} frames in an input video.')
121
+
122
+ h, w = self.input_size
123
+ # batch, frames, height, width, colors
124
+ input_frames_permuted = input_frames.permute((0, 1, 4, 2, 3))
125
+ # batch, frames, colors, height, width
126
+
127
+ in_x = input_frames_permuted.reshape(-1, self.window_size * 3, self.input_size[0], self.input_size[1])
128
+
129
+ ### DOWNSCALE ###
130
+ block_1_out = self.conv_block_1(in_x) # 384 x 384 x 128
131
+ block_2_out = self.conv_block_2(block_1_out) # 192 x 192 x 128
132
+ block_3_out = self.conv_block_3(block_2_out) # 96 x 96 x 128
133
+ block_4_out = self.conv_block_4(block_3_out) # 48 x 48 x 128
134
+ block_5_out = self.conv_block_5(block_4_out) # 24 x 24 x 128
135
+ block_6_out = self.conv_block_6(block_5_out) # 12 x 12 x 128
136
+ block_7_out = self.conv_block_7(block_6_out) # 6 x 6 x 128
137
+ block_8_out = self.conv_block_8(block_7_out) # 3 x 3 x 128
138
+
139
+ ### UPSCALE ###
140
+ block_9_out = block_7_out + self.conv_block_9(block_8_out) # 6 x 6 x 128
141
+ block_10_out = block_6_out + self.conv_block_10(block_9_out) # 12 x 12 x 128
142
+ block_11_out = block_5_out + self.conv_block_11(block_10_out) # 24 x 24 x 128
143
+ block_12_out = block_4_out + self.conv_block_12(block_11_out) # 48 x 48 x 128
144
+ block_13_out = block_3_out + self.conv_block_13(block_12_out) # 96 x 96 x 128
145
+ block_14_out = block_2_out + self.conv_block_14(block_13_out) # 192 x 192 x 128
146
+ block_15_out = block_1_out + self.conv_block_15(block_14_out) # 384 x 384 x 128
147
+
148
+ block_16_out = self.conv_block_16(block_15_out) # 384 x 384 x (2 + 1 + 3)
149
+ out = block_16_out.reshape(-1, self.out_channels, self.input_size[0], self.input_size[1])
150
+
151
+ ### for future model training ###
152
+ device = out.get_device()
153
+
154
+ pred_flow = out[:,:2,:,:] * 255 # (-255, 255)
155
+ pred_occl = (out[:,2:3,:,:] + 1) / 2 # [0, 1]
156
+ pred_next = out[:,3:6,:,:]
157
+
158
+ # Generate sampling grids
159
+
160
+ # Create grid to upsample input
161
+ '''
162
+ d = torch.linspace(-1, 1, 8)
163
+ meshx, meshy = torch.meshgrid((d, d))
164
+ grid = torch.stack((meshy, meshx), 2)
165
+ grid = grid.unsqueeze(0) '''
166
+
167
+ grid_y, grid_x = torch.meshgrid(torch.arange(0, h), torch.arange(0, w))
168
+ flow_grid = torch.stack((grid_x, grid_y), dim=0).float()
169
+ flow_grid = flow_grid.unsqueeze(0).to(device=device)
170
+ flow_grid = flow_grid + pred_flow
171
+
172
+ flow_grid[:, 0, :, :] = 2 * flow_grid[:, 0, :, :] / (w - 1) - 1
173
+ flow_grid[:, 1, :, :] = 2 * flow_grid[:, 1, :, :] / (h - 1) - 1
174
+ # batch, flow_chanels, height, width
175
+ flow_grid = flow_grid.permute(0, 2, 3, 1)
176
+ # batch, height, width, flow_chanels
177
+
178
+ previous_frame = input_frames_permuted[:, -1, :, :, :]
179
+ sampling_mode = "bilinear" if self.training else "nearest"
180
+ warped_frame = torch.nn.functional.grid_sample(previous_frame, flow_grid, mode=sampling_mode, padding_mode="reflection", align_corners=False)
181
+ alpha_mask = torch.clip(pred_occl * 10, 0, 1) * 0.04
182
+ pred_next = torch.clip(pred_next, -1, 1)
183
+ warped_frame = torch.clip(warped_frame, -1, 1)
184
+ next_frame = pred_next * alpha_mask + warped_frame * (1 - alpha_mask)
185
+
186
+ res = torch.cat((pred_flow / 255, pred_occl * 2 - 1, next_frame), dim=1)
187
+
188
+ # batch, channels, height, width
189
+ res = res.permute((0, 2, 3, 1))
190
+ # batch, height, width, channels
191
+ return res
SD-CN-Animation/LICENSE ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ License
2
+
3
+ Copyright (c) 2023 Alexey Borsky
4
+
5
+ The Software is subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all
8
+ copies or substantial portions of the Software.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
11
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
12
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
14
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
15
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16
+ SOFTWARE.
17
+
18
+ This repository can only be used for personal/research/non-commercial purposes.
19
+ However, for commercial requests, please contact us directly at
20
+ borsky.alexey@gmail.com. This restriction applies only to the code itself, all
21
+ derivative works made using this repository (i.e. images and video) can be
22
+ used for any purposes without restrictions.
SD-CN-Animation/RAFT/LICENSE ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2020, princeton-vl
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ * Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SD-CN-Animation/RAFT/__pycache__/corr.cpython-310.pyc ADDED
Binary file (3.08 kB). View file
 
SD-CN-Animation/RAFT/__pycache__/extractor.cpython-310.pyc ADDED
Binary file (5.79 kB). View file
 
SD-CN-Animation/RAFT/__pycache__/raft.cpython-310.pyc ADDED
Binary file (4.22 kB). View file
 
SD-CN-Animation/RAFT/__pycache__/update.cpython-310.pyc ADDED
Binary file (5.64 kB). View file
 
SD-CN-Animation/RAFT/corr.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ from RAFT.utils.utils import bilinear_sampler, coords_grid
4
+
5
+ try:
6
+ import alt_cuda_corr
7
+ except:
8
+ # alt_cuda_corr is not compiled
9
+ pass
10
+
11
+
12
+ class CorrBlock:
13
+ def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
14
+ self.num_levels = num_levels
15
+ self.radius = radius
16
+ self.corr_pyramid = []
17
+
18
+ # all pairs correlation
19
+ corr = CorrBlock.corr(fmap1, fmap2)
20
+
21
+ batch, h1, w1, dim, h2, w2 = corr.shape
22
+ corr = corr.reshape(batch*h1*w1, dim, h2, w2)
23
+
24
+ self.corr_pyramid.append(corr)
25
+ for i in range(self.num_levels-1):
26
+ corr = F.avg_pool2d(corr, 2, stride=2)
27
+ self.corr_pyramid.append(corr)
28
+
29
+ def __call__(self, coords):
30
+ r = self.radius
31
+ coords = coords.permute(0, 2, 3, 1)
32
+ batch, h1, w1, _ = coords.shape
33
+
34
+ out_pyramid = []
35
+ for i in range(self.num_levels):
36
+ corr = self.corr_pyramid[i]
37
+ dx = torch.linspace(-r, r, 2*r+1, device=coords.device)
38
+ dy = torch.linspace(-r, r, 2*r+1, device=coords.device)
39
+ delta = torch.stack(torch.meshgrid(dy, dx), axis=-1)
40
+
41
+ centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i
42
+ delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2)
43
+ coords_lvl = centroid_lvl + delta_lvl
44
+
45
+ corr = bilinear_sampler(corr, coords_lvl)
46
+ corr = corr.view(batch, h1, w1, -1)
47
+ out_pyramid.append(corr)
48
+
49
+ out = torch.cat(out_pyramid, dim=-1)
50
+ return out.permute(0, 3, 1, 2).contiguous().float()
51
+
52
+ @staticmethod
53
+ def corr(fmap1, fmap2):
54
+ batch, dim, ht, wd = fmap1.shape
55
+ fmap1 = fmap1.view(batch, dim, ht*wd)
56
+ fmap2 = fmap2.view(batch, dim, ht*wd)
57
+
58
+ corr = torch.matmul(fmap1.transpose(1,2), fmap2)
59
+ corr = corr.view(batch, ht, wd, 1, ht, wd)
60
+ return corr / torch.sqrt(torch.tensor(dim).float())
61
+
62
+
63
+ class AlternateCorrBlock:
64
+ def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
65
+ self.num_levels = num_levels
66
+ self.radius = radius
67
+
68
+ self.pyramid = [(fmap1, fmap2)]
69
+ for i in range(self.num_levels):
70
+ fmap1 = F.avg_pool2d(fmap1, 2, stride=2)
71
+ fmap2 = F.avg_pool2d(fmap2, 2, stride=2)
72
+ self.pyramid.append((fmap1, fmap2))
73
+
74
+ def __call__(self, coords):
75
+ coords = coords.permute(0, 2, 3, 1)
76
+ B, H, W, _ = coords.shape
77
+ dim = self.pyramid[0][0].shape[1]
78
+
79
+ corr_list = []
80
+ for i in range(self.num_levels):
81
+ r = self.radius
82
+ fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1).contiguous()
83
+ fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1).contiguous()
84
+
85
+ coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous()
86
+ corr, = alt_cuda_corr.forward(fmap1_i, fmap2_i, coords_i, r)
87
+ corr_list.append(corr.squeeze(1))
88
+
89
+ corr = torch.stack(corr_list, dim=1)
90
+ corr = corr.reshape(B, -1, H, W)
91
+ return corr / torch.sqrt(torch.tensor(dim).float())
SD-CN-Animation/RAFT/extractor.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+
6
+ class ResidualBlock(nn.Module):
7
+ def __init__(self, in_planes, planes, norm_fn='group', stride=1):
8
+ super(ResidualBlock, self).__init__()
9
+
10
+ self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
11
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
12
+ self.relu = nn.ReLU(inplace=True)
13
+
14
+ num_groups = planes // 8
15
+
16
+ if norm_fn == 'group':
17
+ self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
18
+ self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
19
+ if not stride == 1:
20
+ self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
21
+
22
+ elif norm_fn == 'batch':
23
+ self.norm1 = nn.BatchNorm2d(planes)
24
+ self.norm2 = nn.BatchNorm2d(planes)
25
+ if not stride == 1:
26
+ self.norm3 = nn.BatchNorm2d(planes)
27
+
28
+ elif norm_fn == 'instance':
29
+ self.norm1 = nn.InstanceNorm2d(planes)
30
+ self.norm2 = nn.InstanceNorm2d(planes)
31
+ if not stride == 1:
32
+ self.norm3 = nn.InstanceNorm2d(planes)
33
+
34
+ elif norm_fn == 'none':
35
+ self.norm1 = nn.Sequential()
36
+ self.norm2 = nn.Sequential()
37
+ if not stride == 1:
38
+ self.norm3 = nn.Sequential()
39
+
40
+ if stride == 1:
41
+ self.downsample = None
42
+
43
+ else:
44
+ self.downsample = nn.Sequential(
45
+ nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
46
+
47
+
48
+ def forward(self, x):
49
+ y = x
50
+ y = self.relu(self.norm1(self.conv1(y)))
51
+ y = self.relu(self.norm2(self.conv2(y)))
52
+
53
+ if self.downsample is not None:
54
+ x = self.downsample(x)
55
+
56
+ return self.relu(x+y)
57
+
58
+
59
+
60
+ class BottleneckBlock(nn.Module):
61
+ def __init__(self, in_planes, planes, norm_fn='group', stride=1):
62
+ super(BottleneckBlock, self).__init__()
63
+
64
+ self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0)
65
+ self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride)
66
+ self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0)
67
+ self.relu = nn.ReLU(inplace=True)
68
+
69
+ num_groups = planes // 8
70
+
71
+ if norm_fn == 'group':
72
+ self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
73
+ self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
74
+ self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
75
+ if not stride == 1:
76
+ self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
77
+
78
+ elif norm_fn == 'batch':
79
+ self.norm1 = nn.BatchNorm2d(planes//4)
80
+ self.norm2 = nn.BatchNorm2d(planes//4)
81
+ self.norm3 = nn.BatchNorm2d(planes)
82
+ if not stride == 1:
83
+ self.norm4 = nn.BatchNorm2d(planes)
84
+
85
+ elif norm_fn == 'instance':
86
+ self.norm1 = nn.InstanceNorm2d(planes//4)
87
+ self.norm2 = nn.InstanceNorm2d(planes//4)
88
+ self.norm3 = nn.InstanceNorm2d(planes)
89
+ if not stride == 1:
90
+ self.norm4 = nn.InstanceNorm2d(planes)
91
+
92
+ elif norm_fn == 'none':
93
+ self.norm1 = nn.Sequential()
94
+ self.norm2 = nn.Sequential()
95
+ self.norm3 = nn.Sequential()
96
+ if not stride == 1:
97
+ self.norm4 = nn.Sequential()
98
+
99
+ if stride == 1:
100
+ self.downsample = None
101
+
102
+ else:
103
+ self.downsample = nn.Sequential(
104
+ nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4)
105
+
106
+
107
+ def forward(self, x):
108
+ y = x
109
+ y = self.relu(self.norm1(self.conv1(y)))
110
+ y = self.relu(self.norm2(self.conv2(y)))
111
+ y = self.relu(self.norm3(self.conv3(y)))
112
+
113
+ if self.downsample is not None:
114
+ x = self.downsample(x)
115
+
116
+ return self.relu(x+y)
117
+
118
+ class BasicEncoder(nn.Module):
119
+ def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
120
+ super(BasicEncoder, self).__init__()
121
+ self.norm_fn = norm_fn
122
+
123
+ if self.norm_fn == 'group':
124
+ self.norm1 = nn.GroupNorm(num_groups=8, num_channels=64)
125
+
126
+ elif self.norm_fn == 'batch':
127
+ self.norm1 = nn.BatchNorm2d(64)
128
+
129
+ elif self.norm_fn == 'instance':
130
+ self.norm1 = nn.InstanceNorm2d(64)
131
+
132
+ elif self.norm_fn == 'none':
133
+ self.norm1 = nn.Sequential()
134
+
135
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
136
+ self.relu1 = nn.ReLU(inplace=True)
137
+
138
+ self.in_planes = 64
139
+ self.layer1 = self._make_layer(64, stride=1)
140
+ self.layer2 = self._make_layer(96, stride=2)
141
+ self.layer3 = self._make_layer(128, stride=2)
142
+
143
+ # output convolution
144
+ self.conv2 = nn.Conv2d(128, output_dim, kernel_size=1)
145
+
146
+ self.dropout = None
147
+ if dropout > 0:
148
+ self.dropout = nn.Dropout2d(p=dropout)
149
+
150
+ for m in self.modules():
151
+ if isinstance(m, nn.Conv2d):
152
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
153
+ elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
154
+ if m.weight is not None:
155
+ nn.init.constant_(m.weight, 1)
156
+ if m.bias is not None:
157
+ nn.init.constant_(m.bias, 0)
158
+
159
+ def _make_layer(self, dim, stride=1):
160
+ layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
161
+ layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
162
+ layers = (layer1, layer2)
163
+
164
+ self.in_planes = dim
165
+ return nn.Sequential(*layers)
166
+
167
+
168
+ def forward(self, x):
169
+
170
+ # if input is list, combine batch dimension
171
+ is_list = isinstance(x, tuple) or isinstance(x, list)
172
+ if is_list:
173
+ batch_dim = x[0].shape[0]
174
+ x = torch.cat(x, dim=0)
175
+
176
+ x = self.conv1(x)
177
+ x = self.norm1(x)
178
+ x = self.relu1(x)
179
+
180
+ x = self.layer1(x)
181
+ x = self.layer2(x)
182
+ x = self.layer3(x)
183
+
184
+ x = self.conv2(x)
185
+
186
+ if self.training and self.dropout is not None:
187
+ x = self.dropout(x)
188
+
189
+ if is_list:
190
+ x = torch.split(x, [batch_dim, batch_dim], dim=0)
191
+
192
+ return x
193
+
194
+
195
+ class SmallEncoder(nn.Module):
196
+ def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
197
+ super(SmallEncoder, self).__init__()
198
+ self.norm_fn = norm_fn
199
+
200
+ if self.norm_fn == 'group':
201
+ self.norm1 = nn.GroupNorm(num_groups=8, num_channels=32)
202
+
203
+ elif self.norm_fn == 'batch':
204
+ self.norm1 = nn.BatchNorm2d(32)
205
+
206
+ elif self.norm_fn == 'instance':
207
+ self.norm1 = nn.InstanceNorm2d(32)
208
+
209
+ elif self.norm_fn == 'none':
210
+ self.norm1 = nn.Sequential()
211
+
212
+ self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3)
213
+ self.relu1 = nn.ReLU(inplace=True)
214
+
215
+ self.in_planes = 32
216
+ self.layer1 = self._make_layer(32, stride=1)
217
+ self.layer2 = self._make_layer(64, stride=2)
218
+ self.layer3 = self._make_layer(96, stride=2)
219
+
220
+ self.dropout = None
221
+ if dropout > 0:
222
+ self.dropout = nn.Dropout2d(p=dropout)
223
+
224
+ self.conv2 = nn.Conv2d(96, output_dim, kernel_size=1)
225
+
226
+ for m in self.modules():
227
+ if isinstance(m, nn.Conv2d):
228
+ nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
229
+ elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
230
+ if m.weight is not None:
231
+ nn.init.constant_(m.weight, 1)
232
+ if m.bias is not None:
233
+ nn.init.constant_(m.bias, 0)
234
+
235
+ def _make_layer(self, dim, stride=1):
236
+ layer1 = BottleneckBlock(self.in_planes, dim, self.norm_fn, stride=stride)
237
+ layer2 = BottleneckBlock(dim, dim, self.norm_fn, stride=1)
238
+ layers = (layer1, layer2)
239
+
240
+ self.in_planes = dim
241
+ return nn.Sequential(*layers)
242
+
243
+
244
+ def forward(self, x):
245
+
246
+ # if input is list, combine batch dimension
247
+ is_list = isinstance(x, tuple) or isinstance(x, list)
248
+ if is_list:
249
+ batch_dim = x[0].shape[0]
250
+ x = torch.cat(x, dim=0)
251
+
252
+ x = self.conv1(x)
253
+ x = self.norm1(x)
254
+ x = self.relu1(x)
255
+
256
+ x = self.layer1(x)
257
+ x = self.layer2(x)
258
+ x = self.layer3(x)
259
+ x = self.conv2(x)
260
+
261
+ if self.training and self.dropout is not None:
262
+ x = self.dropout(x)
263
+
264
+ if is_list:
265
+ x = torch.split(x, [batch_dim, batch_dim], dim=0)
266
+
267
+ return x
SD-CN-Animation/RAFT/raft.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+ from RAFT.update import BasicUpdateBlock, SmallUpdateBlock
7
+ from RAFT.extractor import BasicEncoder, SmallEncoder
8
+ from RAFT.corr import CorrBlock, AlternateCorrBlock
9
+ from RAFT.utils.utils import bilinear_sampler, coords_grid, upflow8
10
+
11
+ try:
12
+ autocast = torch.cuda.amp.autocast
13
+ except:
14
+ # dummy autocast for PyTorch < 1.6
15
+ class autocast:
16
+ def __init__(self, enabled):
17
+ pass
18
+ def __enter__(self):
19
+ pass
20
+ def __exit__(self, *args):
21
+ pass
22
+
23
+
24
+ class RAFT(nn.Module):
25
+ def __init__(self, args):
26
+ super(RAFT, self).__init__()
27
+ self.args = args
28
+
29
+ if args.small:
30
+ self.hidden_dim = hdim = 96
31
+ self.context_dim = cdim = 64
32
+ args.corr_levels = 4
33
+ args.corr_radius = 3
34
+
35
+ else:
36
+ self.hidden_dim = hdim = 128
37
+ self.context_dim = cdim = 128
38
+ args.corr_levels = 4
39
+ args.corr_radius = 4
40
+
41
+ if 'dropout' not in self.args:
42
+ self.args.dropout = 0
43
+
44
+ if 'alternate_corr' not in self.args:
45
+ self.args.alternate_corr = False
46
+
47
+ # feature network, context network, and update block
48
+ if args.small:
49
+ self.fnet = SmallEncoder(output_dim=128, norm_fn='instance', dropout=args.dropout)
50
+ self.cnet = SmallEncoder(output_dim=hdim+cdim, norm_fn='none', dropout=args.dropout)
51
+ self.update_block = SmallUpdateBlock(self.args, hidden_dim=hdim)
52
+
53
+ else:
54
+ self.fnet = BasicEncoder(output_dim=256, norm_fn='instance', dropout=args.dropout)
55
+ self.cnet = BasicEncoder(output_dim=hdim+cdim, norm_fn='batch', dropout=args.dropout)
56
+ self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim)
57
+
58
+ def freeze_bn(self):
59
+ for m in self.modules():
60
+ if isinstance(m, nn.BatchNorm2d):
61
+ m.eval()
62
+
63
+ def initialize_flow(self, img):
64
+ """ Flow is represented as difference between two coordinate grids flow = coords1 - coords0"""
65
+ N, C, H, W = img.shape
66
+ coords0 = coords_grid(N, H//8, W//8, device=img.device)
67
+ coords1 = coords_grid(N, H//8, W//8, device=img.device)
68
+
69
+ # optical flow computed as difference: flow = coords1 - coords0
70
+ return coords0, coords1
71
+
72
+ def upsample_flow(self, flow, mask):
73
+ """ Upsample flow field [H/8, W/8, 2] -> [H, W, 2] using convex combination """
74
+ N, _, H, W = flow.shape
75
+ mask = mask.view(N, 1, 9, 8, 8, H, W)
76
+ mask = torch.softmax(mask, dim=2)
77
+
78
+ up_flow = F.unfold(8 * flow, [3,3], padding=1)
79
+ up_flow = up_flow.view(N, 2, 9, 1, 1, H, W)
80
+
81
+ up_flow = torch.sum(mask * up_flow, dim=2)
82
+ up_flow = up_flow.permute(0, 1, 4, 2, 5, 3)
83
+ return up_flow.reshape(N, 2, 8*H, 8*W)
84
+
85
+
86
+ def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_mode=False):
87
+ """ Estimate optical flow between pair of frames """
88
+
89
+ image1 = 2 * (image1 / 255.0) - 1.0
90
+ image2 = 2 * (image2 / 255.0) - 1.0
91
+
92
+ image1 = image1.contiguous()
93
+ image2 = image2.contiguous()
94
+
95
+ hdim = self.hidden_dim
96
+ cdim = self.context_dim
97
+
98
+ # run the feature network
99
+ with autocast(enabled=self.args.mixed_precision):
100
+ fmap1, fmap2 = self.fnet([image1, image2])
101
+
102
+ fmap1 = fmap1.float()
103
+ fmap2 = fmap2.float()
104
+ if self.args.alternate_corr:
105
+ corr_fn = AlternateCorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
106
+ else:
107
+ corr_fn = CorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
108
+
109
+ # run the context network
110
+ with autocast(enabled=self.args.mixed_precision):
111
+ cnet = self.cnet(image1)
112
+ net, inp = torch.split(cnet, [hdim, cdim], dim=1)
113
+ net = torch.tanh(net)
114
+ inp = torch.relu(inp)
115
+
116
+ coords0, coords1 = self.initialize_flow(image1)
117
+
118
+ if flow_init is not None:
119
+ coords1 = coords1 + flow_init
120
+
121
+ flow_predictions = []
122
+ for itr in range(iters):
123
+ coords1 = coords1.detach()
124
+ corr = corr_fn(coords1) # index correlation volume
125
+
126
+ flow = coords1 - coords0
127
+ with autocast(enabled=self.args.mixed_precision):
128
+ net, up_mask, delta_flow = self.update_block(net, inp, corr, flow)
129
+
130
+ # F(t+1) = F(t) + \Delta(t)
131
+ coords1 = coords1 + delta_flow
132
+
133
+ # upsample predictions
134
+ if up_mask is None:
135
+ flow_up = upflow8(coords1 - coords0)
136
+ else:
137
+ flow_up = self.upsample_flow(coords1 - coords0, up_mask)
138
+
139
+ flow_predictions.append(flow_up)
140
+
141
+ if test_mode:
142
+ return coords1 - coords0, flow_up
143
+
144
+ return flow_predictions
SD-CN-Animation/RAFT/update.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+
6
+ class FlowHead(nn.Module):
7
+ def __init__(self, input_dim=128, hidden_dim=256):
8
+ super(FlowHead, self).__init__()
9
+ self.conv1 = nn.Conv2d(input_dim, hidden_dim, 3, padding=1)
10
+ self.conv2 = nn.Conv2d(hidden_dim, 2, 3, padding=1)
11
+ self.relu = nn.ReLU(inplace=True)
12
+
13
+ def forward(self, x):
14
+ return self.conv2(self.relu(self.conv1(x)))
15
+
16
+ class ConvGRU(nn.Module):
17
+ def __init__(self, hidden_dim=128, input_dim=192+128):
18
+ super(ConvGRU, self).__init__()
19
+ self.convz = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
20
+ self.convr = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
21
+ self.convq = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
22
+
23
+ def forward(self, h, x):
24
+ hx = torch.cat([h, x], dim=1)
25
+
26
+ z = torch.sigmoid(self.convz(hx))
27
+ r = torch.sigmoid(self.convr(hx))
28
+ q = torch.tanh(self.convq(torch.cat([r*h, x], dim=1)))
29
+
30
+ h = (1-z) * h + z * q
31
+ return h
32
+
33
+ class SepConvGRU(nn.Module):
34
+ def __init__(self, hidden_dim=128, input_dim=192+128):
35
+ super(SepConvGRU, self).__init__()
36
+ self.convz1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
37
+ self.convr1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
38
+ self.convq1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
39
+
40
+ self.convz2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
41
+ self.convr2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
42
+ self.convq2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
43
+
44
+
45
+ def forward(self, h, x):
46
+ # horizontal
47
+ hx = torch.cat([h, x], dim=1)
48
+ z = torch.sigmoid(self.convz1(hx))
49
+ r = torch.sigmoid(self.convr1(hx))
50
+ q = torch.tanh(self.convq1(torch.cat([r*h, x], dim=1)))
51
+ h = (1-z) * h + z * q
52
+
53
+ # vertical
54
+ hx = torch.cat([h, x], dim=1)
55
+ z = torch.sigmoid(self.convz2(hx))
56
+ r = torch.sigmoid(self.convr2(hx))
57
+ q = torch.tanh(self.convq2(torch.cat([r*h, x], dim=1)))
58
+ h = (1-z) * h + z * q
59
+
60
+ return h
61
+
62
+ class SmallMotionEncoder(nn.Module):
63
+ def __init__(self, args):
64
+ super(SmallMotionEncoder, self).__init__()
65
+ cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
66
+ self.convc1 = nn.Conv2d(cor_planes, 96, 1, padding=0)
67
+ self.convf1 = nn.Conv2d(2, 64, 7, padding=3)
68
+ self.convf2 = nn.Conv2d(64, 32, 3, padding=1)
69
+ self.conv = nn.Conv2d(128, 80, 3, padding=1)
70
+
71
+ def forward(self, flow, corr):
72
+ cor = F.relu(self.convc1(corr))
73
+ flo = F.relu(self.convf1(flow))
74
+ flo = F.relu(self.convf2(flo))
75
+ cor_flo = torch.cat([cor, flo], dim=1)
76
+ out = F.relu(self.conv(cor_flo))
77
+ return torch.cat([out, flow], dim=1)
78
+
79
+ class BasicMotionEncoder(nn.Module):
80
+ def __init__(self, args):
81
+ super(BasicMotionEncoder, self).__init__()
82
+ cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
83
+ self.convc1 = nn.Conv2d(cor_planes, 256, 1, padding=0)
84
+ self.convc2 = nn.Conv2d(256, 192, 3, padding=1)
85
+ self.convf1 = nn.Conv2d(2, 128, 7, padding=3)
86
+ self.convf2 = nn.Conv2d(128, 64, 3, padding=1)
87
+ self.conv = nn.Conv2d(64+192, 128-2, 3, padding=1)
88
+
89
+ def forward(self, flow, corr):
90
+ cor = F.relu(self.convc1(corr))
91
+ cor = F.relu(self.convc2(cor))
92
+ flo = F.relu(self.convf1(flow))
93
+ flo = F.relu(self.convf2(flo))
94
+
95
+ cor_flo = torch.cat([cor, flo], dim=1)
96
+ out = F.relu(self.conv(cor_flo))
97
+ return torch.cat([out, flow], dim=1)
98
+
99
+ class SmallUpdateBlock(nn.Module):
100
+ def __init__(self, args, hidden_dim=96):
101
+ super(SmallUpdateBlock, self).__init__()
102
+ self.encoder = SmallMotionEncoder(args)
103
+ self.gru = ConvGRU(hidden_dim=hidden_dim, input_dim=82+64)
104
+ self.flow_head = FlowHead(hidden_dim, hidden_dim=128)
105
+
106
+ def forward(self, net, inp, corr, flow):
107
+ motion_features = self.encoder(flow, corr)
108
+ inp = torch.cat([inp, motion_features], dim=1)
109
+ net = self.gru(net, inp)
110
+ delta_flow = self.flow_head(net)
111
+
112
+ return net, None, delta_flow
113
+
114
+ class BasicUpdateBlock(nn.Module):
115
+ def __init__(self, args, hidden_dim=128, input_dim=128):
116
+ super(BasicUpdateBlock, self).__init__()
117
+ self.args = args
118
+ self.encoder = BasicMotionEncoder(args)
119
+ self.gru = SepConvGRU(hidden_dim=hidden_dim, input_dim=128+hidden_dim)
120
+ self.flow_head = FlowHead(hidden_dim, hidden_dim=256)
121
+
122
+ self.mask = nn.Sequential(
123
+ nn.Conv2d(128, 256, 3, padding=1),
124
+ nn.ReLU(inplace=True),
125
+ nn.Conv2d(256, 64*9, 1, padding=0))
126
+
127
+ def forward(self, net, inp, corr, flow, upsample=True):
128
+ motion_features = self.encoder(flow, corr)
129
+ inp = torch.cat([inp, motion_features], dim=1)
130
+
131
+ net = self.gru(net, inp)
132
+ delta_flow = self.flow_head(net)
133
+
134
+ # scale mask to balence gradients
135
+ mask = .25 * self.mask(net)
136
+ return net, mask, delta_flow
137
+
138
+
139
+
SD-CN-Animation/RAFT/utils/__init__.py ADDED
File without changes
SD-CN-Animation/RAFT/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (166 Bytes). View file
 
SD-CN-Animation/RAFT/utils/__pycache__/utils.cpython-310.pyc ADDED
Binary file (3.12 kB). View file
 
SD-CN-Animation/RAFT/utils/augmentor.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+ import math
4
+ from PIL import Image
5
+
6
+ import cv2
7
+ cv2.setNumThreads(0)
8
+ cv2.ocl.setUseOpenCL(False)
9
+
10
+ import torch
11
+ from torchvision.transforms import ColorJitter
12
+ import torch.nn.functional as F
13
+
14
+
15
+ class FlowAugmentor:
16
+ def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=True):
17
+
18
+ # spatial augmentation params
19
+ self.crop_size = crop_size
20
+ self.min_scale = min_scale
21
+ self.max_scale = max_scale
22
+ self.spatial_aug_prob = 0.8
23
+ self.stretch_prob = 0.8
24
+ self.max_stretch = 0.2
25
+
26
+ # flip augmentation params
27
+ self.do_flip = do_flip
28
+ self.h_flip_prob = 0.5
29
+ self.v_flip_prob = 0.1
30
+
31
+ # photometric augmentation params
32
+ self.photo_aug = ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.5/3.14)
33
+ self.asymmetric_color_aug_prob = 0.2
34
+ self.eraser_aug_prob = 0.5
35
+
36
+ def color_transform(self, img1, img2):
37
+ """ Photometric augmentation """
38
+
39
+ # asymmetric
40
+ if np.random.rand() < self.asymmetric_color_aug_prob:
41
+ img1 = np.array(self.photo_aug(Image.fromarray(img1)), dtype=np.uint8)
42
+ img2 = np.array(self.photo_aug(Image.fromarray(img2)), dtype=np.uint8)
43
+
44
+ # symmetric
45
+ else:
46
+ image_stack = np.concatenate([img1, img2], axis=0)
47
+ image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
48
+ img1, img2 = np.split(image_stack, 2, axis=0)
49
+
50
+ return img1, img2
51
+
52
+ def eraser_transform(self, img1, img2, bounds=[50, 100]):
53
+ """ Occlusion augmentation """
54
+
55
+ ht, wd = img1.shape[:2]
56
+ if np.random.rand() < self.eraser_aug_prob:
57
+ mean_color = np.mean(img2.reshape(-1, 3), axis=0)
58
+ for _ in range(np.random.randint(1, 3)):
59
+ x0 = np.random.randint(0, wd)
60
+ y0 = np.random.randint(0, ht)
61
+ dx = np.random.randint(bounds[0], bounds[1])
62
+ dy = np.random.randint(bounds[0], bounds[1])
63
+ img2[y0:y0+dy, x0:x0+dx, :] = mean_color
64
+
65
+ return img1, img2
66
+
67
+ def spatial_transform(self, img1, img2, flow):
68
+ # randomly sample scale
69
+ ht, wd = img1.shape[:2]
70
+ min_scale = np.maximum(
71
+ (self.crop_size[0] + 8) / float(ht),
72
+ (self.crop_size[1] + 8) / float(wd))
73
+
74
+ scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
75
+ scale_x = scale
76
+ scale_y = scale
77
+ if np.random.rand() < self.stretch_prob:
78
+ scale_x *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
79
+ scale_y *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
80
+
81
+ scale_x = np.clip(scale_x, min_scale, None)
82
+ scale_y = np.clip(scale_y, min_scale, None)
83
+
84
+ if np.random.rand() < self.spatial_aug_prob:
85
+ # rescale the images
86
+ img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
87
+ img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
88
+ flow = cv2.resize(flow, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
89
+ flow = flow * [scale_x, scale_y]
90
+
91
+ if self.do_flip:
92
+ if np.random.rand() < self.h_flip_prob: # h-flip
93
+ img1 = img1[:, ::-1]
94
+ img2 = img2[:, ::-1]
95
+ flow = flow[:, ::-1] * [-1.0, 1.0]
96
+
97
+ if np.random.rand() < self.v_flip_prob: # v-flip
98
+ img1 = img1[::-1, :]
99
+ img2 = img2[::-1, :]
100
+ flow = flow[::-1, :] * [1.0, -1.0]
101
+
102
+ y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0])
103
+ x0 = np.random.randint(0, img1.shape[1] - self.crop_size[1])
104
+
105
+ img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
106
+ img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
107
+ flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
108
+
109
+ return img1, img2, flow
110
+
111
+ def __call__(self, img1, img2, flow):
112
+ img1, img2 = self.color_transform(img1, img2)
113
+ img1, img2 = self.eraser_transform(img1, img2)
114
+ img1, img2, flow = self.spatial_transform(img1, img2, flow)
115
+
116
+ img1 = np.ascontiguousarray(img1)
117
+ img2 = np.ascontiguousarray(img2)
118
+ flow = np.ascontiguousarray(flow)
119
+
120
+ return img1, img2, flow
121
+
122
+ class SparseFlowAugmentor:
123
+ def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=False):
124
+ # spatial augmentation params
125
+ self.crop_size = crop_size
126
+ self.min_scale = min_scale
127
+ self.max_scale = max_scale
128
+ self.spatial_aug_prob = 0.8
129
+ self.stretch_prob = 0.8
130
+ self.max_stretch = 0.2
131
+
132
+ # flip augmentation params
133
+ self.do_flip = do_flip
134
+ self.h_flip_prob = 0.5
135
+ self.v_flip_prob = 0.1
136
+
137
+ # photometric augmentation params
138
+ self.photo_aug = ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3/3.14)
139
+ self.asymmetric_color_aug_prob = 0.2
140
+ self.eraser_aug_prob = 0.5
141
+
142
+ def color_transform(self, img1, img2):
143
+ image_stack = np.concatenate([img1, img2], axis=0)
144
+ image_stack = np.array(self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8)
145
+ img1, img2 = np.split(image_stack, 2, axis=0)
146
+ return img1, img2
147
+
148
+ def eraser_transform(self, img1, img2):
149
+ ht, wd = img1.shape[:2]
150
+ if np.random.rand() < self.eraser_aug_prob:
151
+ mean_color = np.mean(img2.reshape(-1, 3), axis=0)
152
+ for _ in range(np.random.randint(1, 3)):
153
+ x0 = np.random.randint(0, wd)
154
+ y0 = np.random.randint(0, ht)
155
+ dx = np.random.randint(50, 100)
156
+ dy = np.random.randint(50, 100)
157
+ img2[y0:y0+dy, x0:x0+dx, :] = mean_color
158
+
159
+ return img1, img2
160
+
161
+ def resize_sparse_flow_map(self, flow, valid, fx=1.0, fy=1.0):
162
+ ht, wd = flow.shape[:2]
163
+ coords = np.meshgrid(np.arange(wd), np.arange(ht))
164
+ coords = np.stack(coords, axis=-1)
165
+
166
+ coords = coords.reshape(-1, 2).astype(np.float32)
167
+ flow = flow.reshape(-1, 2).astype(np.float32)
168
+ valid = valid.reshape(-1).astype(np.float32)
169
+
170
+ coords0 = coords[valid>=1]
171
+ flow0 = flow[valid>=1]
172
+
173
+ ht1 = int(round(ht * fy))
174
+ wd1 = int(round(wd * fx))
175
+
176
+ coords1 = coords0 * [fx, fy]
177
+ flow1 = flow0 * [fx, fy]
178
+
179
+ xx = np.round(coords1[:,0]).astype(np.int32)
180
+ yy = np.round(coords1[:,1]).astype(np.int32)
181
+
182
+ v = (xx > 0) & (xx < wd1) & (yy > 0) & (yy < ht1)
183
+ xx = xx[v]
184
+ yy = yy[v]
185
+ flow1 = flow1[v]
186
+
187
+ flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32)
188
+ valid_img = np.zeros([ht1, wd1], dtype=np.int32)
189
+
190
+ flow_img[yy, xx] = flow1
191
+ valid_img[yy, xx] = 1
192
+
193
+ return flow_img, valid_img
194
+
195
+ def spatial_transform(self, img1, img2, flow, valid):
196
+ # randomly sample scale
197
+
198
+ ht, wd = img1.shape[:2]
199
+ min_scale = np.maximum(
200
+ (self.crop_size[0] + 1) / float(ht),
201
+ (self.crop_size[1] + 1) / float(wd))
202
+
203
+ scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
204
+ scale_x = np.clip(scale, min_scale, None)
205
+ scale_y = np.clip(scale, min_scale, None)
206
+
207
+ if np.random.rand() < self.spatial_aug_prob:
208
+ # rescale the images
209
+ img1 = cv2.resize(img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
210
+ img2 = cv2.resize(img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR)
211
+ flow, valid = self.resize_sparse_flow_map(flow, valid, fx=scale_x, fy=scale_y)
212
+
213
+ if self.do_flip:
214
+ if np.random.rand() < 0.5: # h-flip
215
+ img1 = img1[:, ::-1]
216
+ img2 = img2[:, ::-1]
217
+ flow = flow[:, ::-1] * [-1.0, 1.0]
218
+ valid = valid[:, ::-1]
219
+
220
+ margin_y = 20
221
+ margin_x = 50
222
+
223
+ y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0] + margin_y)
224
+ x0 = np.random.randint(-margin_x, img1.shape[1] - self.crop_size[1] + margin_x)
225
+
226
+ y0 = np.clip(y0, 0, img1.shape[0] - self.crop_size[0])
227
+ x0 = np.clip(x0, 0, img1.shape[1] - self.crop_size[1])
228
+
229
+ img1 = img1[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
230
+ img2 = img2[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
231
+ flow = flow[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
232
+ valid = valid[y0:y0+self.crop_size[0], x0:x0+self.crop_size[1]]
233
+ return img1, img2, flow, valid
234
+
235
+
236
+ def __call__(self, img1, img2, flow, valid):
237
+ img1, img2 = self.color_transform(img1, img2)
238
+ img1, img2 = self.eraser_transform(img1, img2)
239
+ img1, img2, flow, valid = self.spatial_transform(img1, img2, flow, valid)
240
+
241
+ img1 = np.ascontiguousarray(img1)
242
+ img2 = np.ascontiguousarray(img2)
243
+ flow = np.ascontiguousarray(flow)
244
+ valid = np.ascontiguousarray(valid)
245
+
246
+ return img1, img2, flow, valid
SD-CN-Animation/RAFT/utils/flow_viz.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Flow visualization code used from https://github.com/tomrunia/OpticalFlow_Visualization
2
+
3
+
4
+ # MIT License
5
+ #
6
+ # Copyright (c) 2018 Tom Runia
7
+ #
8
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ # of this software and associated documentation files (the "Software"), to deal
10
+ # in the Software without restriction, including without limitation the rights
11
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ # copies of the Software, and to permit persons to whom the Software is
13
+ # furnished to do so, subject to conditions.
14
+ #
15
+ # Author: Tom Runia
16
+ # Date Created: 2018-08-03
17
+
18
+ import numpy as np
19
+
20
+ def make_colorwheel():
21
+ """
22
+ Generates a color wheel for optical flow visualization as presented in:
23
+ Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007)
24
+ URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf
25
+
26
+ Code follows the original C++ source code of Daniel Scharstein.
27
+ Code follows the the Matlab source code of Deqing Sun.
28
+
29
+ Returns:
30
+ np.ndarray: Color wheel
31
+ """
32
+
33
+ RY = 15
34
+ YG = 6
35
+ GC = 4
36
+ CB = 11
37
+ BM = 13
38
+ MR = 6
39
+
40
+ ncols = RY + YG + GC + CB + BM + MR
41
+ colorwheel = np.zeros((ncols, 3))
42
+ col = 0
43
+
44
+ # RY
45
+ colorwheel[0:RY, 0] = 255
46
+ colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY)
47
+ col = col+RY
48
+ # YG
49
+ colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG)
50
+ colorwheel[col:col+YG, 1] = 255
51
+ col = col+YG
52
+ # GC
53
+ colorwheel[col:col+GC, 1] = 255
54
+ colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC)
55
+ col = col+GC
56
+ # CB
57
+ colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB)
58
+ colorwheel[col:col+CB, 2] = 255
59
+ col = col+CB
60
+ # BM
61
+ colorwheel[col:col+BM, 2] = 255
62
+ colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM)
63
+ col = col+BM
64
+ # MR
65
+ colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR)
66
+ colorwheel[col:col+MR, 0] = 255
67
+ return colorwheel
68
+
69
+
70
+ def flow_uv_to_colors(u, v, convert_to_bgr=False):
71
+ """
72
+ Applies the flow color wheel to (possibly clipped) flow components u and v.
73
+
74
+ According to the C++ source code of Daniel Scharstein
75
+ According to the Matlab source code of Deqing Sun
76
+
77
+ Args:
78
+ u (np.ndarray): Input horizontal flow of shape [H,W]
79
+ v (np.ndarray): Input vertical flow of shape [H,W]
80
+ convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
81
+
82
+ Returns:
83
+ np.ndarray: Flow visualization image of shape [H,W,3]
84
+ """
85
+ flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8)
86
+ colorwheel = make_colorwheel() # shape [55x3]
87
+ ncols = colorwheel.shape[0]
88
+ rad = np.sqrt(np.square(u) + np.square(v))
89
+ a = np.arctan2(-v, -u)/np.pi
90
+ fk = (a+1) / 2*(ncols-1)
91
+ k0 = np.floor(fk).astype(np.int32)
92
+ k1 = k0 + 1
93
+ k1[k1 == ncols] = 0
94
+ f = fk - k0
95
+ for i in range(colorwheel.shape[1]):
96
+ tmp = colorwheel[:,i]
97
+ col0 = tmp[k0] / 255.0
98
+ col1 = tmp[k1] / 255.0
99
+ col = (1-f)*col0 + f*col1
100
+ idx = (rad <= 1)
101
+ col[idx] = 1 - rad[idx] * (1-col[idx])
102
+ col[~idx] = col[~idx] * 0.75 # out of range
103
+ # Note the 2-i => BGR instead of RGB
104
+ ch_idx = 2-i if convert_to_bgr else i
105
+ flow_image[:,:,ch_idx] = np.floor(255 * col)
106
+ return flow_image
107
+
108
+
109
+ def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False):
110
+ """
111
+ Expects a two dimensional flow image of shape.
112
+
113
+ Args:
114
+ flow_uv (np.ndarray): Flow UV image of shape [H,W,2]
115
+ clip_flow (float, optional): Clip maximum of flow values. Defaults to None.
116
+ convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False.
117
+
118
+ Returns:
119
+ np.ndarray: Flow visualization image of shape [H,W,3]
120
+ """
121
+ assert flow_uv.ndim == 3, 'input flow must have three dimensions'
122
+ assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]'
123
+ if clip_flow is not None:
124
+ flow_uv = np.clip(flow_uv, 0, clip_flow)
125
+ u = flow_uv[:,:,0]
126
+ v = flow_uv[:,:,1]
127
+ rad = np.sqrt(np.square(u) + np.square(v))
128
+ rad_max = np.max(rad)
129
+ epsilon = 1e-5
130
+ u = u / (rad_max + epsilon)
131
+ v = v / (rad_max + epsilon)
132
+ return flow_uv_to_colors(u, v, convert_to_bgr)
SD-CN-Animation/RAFT/utils/frame_utils.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from PIL import Image
3
+ from os.path import *
4
+ import re
5
+
6
+ import cv2
7
+ cv2.setNumThreads(0)
8
+ cv2.ocl.setUseOpenCL(False)
9
+
10
+ TAG_CHAR = np.array([202021.25], np.float32)
11
+
12
+ def readFlow(fn):
13
+ """ Read .flo file in Middlebury format"""
14
+ # Code adapted from:
15
+ # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
16
+
17
+ # WARNING: this will work on little-endian architectures (eg Intel x86) only!
18
+ # print 'fn = %s'%(fn)
19
+ with open(fn, 'rb') as f:
20
+ magic = np.fromfile(f, np.float32, count=1)
21
+ if 202021.25 != magic:
22
+ print('Magic number incorrect. Invalid .flo file')
23
+ return None
24
+ else:
25
+ w = np.fromfile(f, np.int32, count=1)
26
+ h = np.fromfile(f, np.int32, count=1)
27
+ # print 'Reading %d x %d flo file\n' % (w, h)
28
+ data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
29
+ # Reshape data into 3D array (columns, rows, bands)
30
+ # The reshape here is for visualization, the original code is (w,h,2)
31
+ return np.resize(data, (int(h), int(w), 2))
32
+
33
+ def readPFM(file):
34
+ file = open(file, 'rb')
35
+
36
+ color = None
37
+ width = None
38
+ height = None
39
+ scale = None
40
+ endian = None
41
+
42
+ header = file.readline().rstrip()
43
+ if header == b'PF':
44
+ color = True
45
+ elif header == b'Pf':
46
+ color = False
47
+ else:
48
+ raise Exception('Not a PFM file.')
49
+
50
+ dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline())
51
+ if dim_match:
52
+ width, height = map(int, dim_match.groups())
53
+ else:
54
+ raise Exception('Malformed PFM header.')
55
+
56
+ scale = float(file.readline().rstrip())
57
+ if scale < 0: # little-endian
58
+ endian = '<'
59
+ scale = -scale
60
+ else:
61
+ endian = '>' # big-endian
62
+
63
+ data = np.fromfile(file, endian + 'f')
64
+ shape = (height, width, 3) if color else (height, width)
65
+
66
+ data = np.reshape(data, shape)
67
+ data = np.flipud(data)
68
+ return data
69
+
70
+ def writeFlow(filename,uv,v=None):
71
+ """ Write optical flow to file.
72
+
73
+ If v is None, uv is assumed to contain both u and v channels,
74
+ stacked in depth.
75
+ Original code by Deqing Sun, adapted from Daniel Scharstein.
76
+ """
77
+ nBands = 2
78
+
79
+ if v is None:
80
+ assert(uv.ndim == 3)
81
+ assert(uv.shape[2] == 2)
82
+ u = uv[:,:,0]
83
+ v = uv[:,:,1]
84
+ else:
85
+ u = uv
86
+
87
+ assert(u.shape == v.shape)
88
+ height,width = u.shape
89
+ f = open(filename,'wb')
90
+ # write the header
91
+ f.write(TAG_CHAR)
92
+ np.array(width).astype(np.int32).tofile(f)
93
+ np.array(height).astype(np.int32).tofile(f)
94
+ # arrange into matrix form
95
+ tmp = np.zeros((height, width*nBands))
96
+ tmp[:,np.arange(width)*2] = u
97
+ tmp[:,np.arange(width)*2 + 1] = v
98
+ tmp.astype(np.float32).tofile(f)
99
+ f.close()
100
+
101
+
102
+ def readFlowKITTI(filename):
103
+ flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH|cv2.IMREAD_COLOR)
104
+ flow = flow[:,:,::-1].astype(np.float32)
105
+ flow, valid = flow[:, :, :2], flow[:, :, 2]
106
+ flow = (flow - 2**15) / 64.0
107
+ return flow, valid
108
+
109
+ def readDispKITTI(filename):
110
+ disp = cv2.imread(filename, cv2.IMREAD_ANYDEPTH) / 256.0
111
+ valid = disp > 0.0
112
+ flow = np.stack([-disp, np.zeros_like(disp)], -1)
113
+ return flow, valid
114
+
115
+
116
+ def writeFlowKITTI(filename, uv):
117
+ uv = 64.0 * uv + 2**15
118
+ valid = np.ones([uv.shape[0], uv.shape[1], 1])
119
+ uv = np.concatenate([uv, valid], axis=-1).astype(np.uint16)
120
+ cv2.imwrite(filename, uv[..., ::-1])
121
+
122
+
123
+ def read_gen(file_name, pil=False):
124
+ ext = splitext(file_name)[-1]
125
+ if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
126
+ return Image.open(file_name)
127
+ elif ext == '.bin' or ext == '.raw':
128
+ return np.load(file_name)
129
+ elif ext == '.flo':
130
+ return readFlow(file_name).astype(np.float32)
131
+ elif ext == '.pfm':
132
+ flow = readPFM(file_name).astype(np.float32)
133
+ if len(flow.shape) == 2:
134
+ return flow
135
+ else:
136
+ return flow[:, :, :-1]
137
+ return []
SD-CN-Animation/RAFT/utils/utils.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ import numpy as np
4
+ from scipy import interpolate
5
+
6
+
7
+ class InputPadder:
8
+ """ Pads images such that dimensions are divisible by 8 """
9
+ def __init__(self, dims, mode='sintel'):
10
+ self.ht, self.wd = dims[-2:]
11
+ pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8
12
+ pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8
13
+ if mode == 'sintel':
14
+ self._pad = [pad_wd//2, pad_wd - pad_wd//2, pad_ht//2, pad_ht - pad_ht//2]
15
+ else:
16
+ self._pad = [pad_wd//2, pad_wd - pad_wd//2, 0, pad_ht]
17
+
18
+ def pad(self, *inputs):
19
+ return [F.pad(x, self._pad, mode='replicate') for x in inputs]
20
+
21
+ def unpad(self,x):
22
+ ht, wd = x.shape[-2:]
23
+ c = [self._pad[2], ht-self._pad[3], self._pad[0], wd-self._pad[1]]
24
+ return x[..., c[0]:c[1], c[2]:c[3]]
25
+
26
+ def forward_interpolate(flow):
27
+ flow = flow.detach().cpu().numpy()
28
+ dx, dy = flow[0], flow[1]
29
+
30
+ ht, wd = dx.shape
31
+ x0, y0 = np.meshgrid(np.arange(wd), np.arange(ht))
32
+
33
+ x1 = x0 + dx
34
+ y1 = y0 + dy
35
+
36
+ x1 = x1.reshape(-1)
37
+ y1 = y1.reshape(-1)
38
+ dx = dx.reshape(-1)
39
+ dy = dy.reshape(-1)
40
+
41
+ valid = (x1 > 0) & (x1 < wd) & (y1 > 0) & (y1 < ht)
42
+ x1 = x1[valid]
43
+ y1 = y1[valid]
44
+ dx = dx[valid]
45
+ dy = dy[valid]
46
+
47
+ flow_x = interpolate.griddata(
48
+ (x1, y1), dx, (x0, y0), method='nearest', fill_value=0)
49
+
50
+ flow_y = interpolate.griddata(
51
+ (x1, y1), dy, (x0, y0), method='nearest', fill_value=0)
52
+
53
+ flow = np.stack([flow_x, flow_y], axis=0)
54
+ return torch.from_numpy(flow).float()
55
+
56
+
57
+ def bilinear_sampler(img, coords, mode='bilinear', mask=False):
58
+ """ Wrapper for grid_sample, uses pixel coordinates """
59
+ H, W = img.shape[-2:]
60
+ xgrid, ygrid = coords.split([1,1], dim=-1)
61
+ xgrid = 2*xgrid/(W-1) - 1
62
+ ygrid = 2*ygrid/(H-1) - 1
63
+
64
+ grid = torch.cat([xgrid, ygrid], dim=-1)
65
+ img = F.grid_sample(img, grid, align_corners=True)
66
+
67
+ if mask:
68
+ mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1)
69
+ return img, mask.float()
70
+
71
+ return img
72
+
73
+
74
+ def coords_grid(batch, ht, wd, device):
75
+ coords = torch.meshgrid(torch.arange(ht, device=device), torch.arange(wd, device=device))
76
+ coords = torch.stack(coords[::-1], dim=0).float()
77
+ return coords[None].repeat(batch, 1, 1, 1)
78
+
79
+
80
+ def upflow8(flow, mode='bilinear'):
81
+ new_size = (8 * flow.shape[2], 8 * flow.shape[3])
82
+ return 8 * F.interpolate(flow, size=new_size, mode=mode, align_corners=True)
SD-CN-Animation/examples/bonefire_1.mp4 ADDED
Binary file (840 kB). View file
 
SD-CN-Animation/examples/bonfire_1.gif ADDED
SD-CN-Animation/examples/cn_settings.png ADDED
SD-CN-Animation/examples/diamond_4.gif ADDED
SD-CN-Animation/examples/diamond_4.mp4 ADDED
Binary file (353 kB). View file
 
SD-CN-Animation/examples/flower_1.gif ADDED

Git LFS Details

  • SHA256: c4fa97e65ea048e27472fa6b7d151ac66074c1ac3e5c5b4cfa333321d97b0bb9
  • Pointer size: 132 Bytes
  • Size of remote file: 1.68 MB
SD-CN-Animation/examples/flower_1.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8db0719d9f215b775ae1b5dae912a425bc010f0586b41894c14bb8ad042711e
3
+ size 1259280
SD-CN-Animation/examples/flower_11.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7499401998e41c65471963d6cbd70568908dd83a8c957a43940df99be7c52026
3
+ size 1328049
SD-CN-Animation/examples/girl_org.gif ADDED

Git LFS Details

  • SHA256: 059307d932d818247672b8de1c8550a67d891ad9c2c32494e30b424abe643480
  • Pointer size: 132 Bytes
  • Size of remote file: 3.15 MB
SD-CN-Animation/examples/girl_to_jc.gif ADDED

Git LFS Details

  • SHA256: 604f24c3072ac1e17f87c0664894e93059e6741d9f17d0f03f33214549edc967
  • Pointer size: 132 Bytes
  • Size of remote file: 3.39 MB
SD-CN-Animation/examples/girl_to_jc.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d09ded8b44f7e30d55d5d6245d9ec7fa3b95e970a8c29d2c544b6c288341e39
3
+ size 5274033
SD-CN-Animation/examples/girl_to_wc.gif ADDED

Git LFS Details

  • SHA256: 9d978c31d2d58f408fa40b186f080fbcdff89fb6e1d8a7cf2a0c81276735bd0c
  • Pointer size: 132 Bytes
  • Size of remote file: 3.32 MB
SD-CN-Animation/examples/girl_to_wc.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd730de667b8e7ea5af2dddcf129095694349f126e06291c9b1c2bb7d49843a8
3
+ size 5630710
SD-CN-Animation/examples/gold_1.gif ADDED

Git LFS Details

  • SHA256: 20d4676372b63cef565f614676660b37226c5fbf7825ba2add15a6262eff1bed
  • Pointer size: 132 Bytes
  • Size of remote file: 1.31 MB
SD-CN-Animation/examples/gold_1.mp4 ADDED
Binary file (636 kB). View file
 
SD-CN-Animation/examples/macaroni_1.gif ADDED

Git LFS Details

  • SHA256: e7e43cd36aa70deac9ebc3fd26615183f9b13ffafd195bb0db2c0a4eb834dba3
  • Pointer size: 132 Bytes
  • Size of remote file: 1.16 MB
SD-CN-Animation/examples/macaroni_1.mp4 ADDED
Binary file (731 kB). View file
 
SD-CN-Animation/examples/tree_2.gif ADDED

Git LFS Details

  • SHA256: 6949d2fe4cd7902d7f339ec04b2d2ea520545128f28476b78f72df3b75f0924d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.6 MB
SD-CN-Animation/examples/tree_2.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2bdab6694727e6dab21e49311efbc21296d445bf11473de6867848b305d3775
3
+ size 1333426
SD-CN-Animation/examples/ui_preview.png ADDED
SD-CN-Animation/install.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import launch
2
+ import os
3
+ import pkg_resources
4
+
5
+ req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt")
6
+
7
+ with open(req_file) as file:
8
+ for package in file:
9
+ try:
10
+ package = package.strip()
11
+ if '==' in package:
12
+ package_name, package_version = package.split('==')
13
+ installed_version = pkg_resources.get_distribution(package_name).version
14
+ if installed_version != package_version:
15
+ launch.run_pip(f"install {package}", f"SD-CN-Animation requirement: changing {package_name} version from {installed_version} to {package_version}")
16
+ elif not launch.is_installed(package):
17
+ launch.run_pip(f"install {package}", f"SD-CN-Animation requirement: {package}")
18
+ except Exception as e:
19
+ print(e)
20
+ print(f'Warning: Failed to install {package}.')
SD-CN-Animation/old_scripts/compute_flow.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import base64
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ import os
6
+
7
+ from flow_utils import RAFT_estimate_flow
8
+ import h5py
9
+
10
+ import argparse
11
+
12
+ def main(args):
13
+ W, H = args.width, args.height
14
+ # Open the input video file
15
+ input_video = cv2.VideoCapture(args.input_video)
16
+
17
+ # Get useful info from the source video
18
+ fps = int(input_video.get(cv2.CAP_PROP_FPS))
19
+ total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
20
+
21
+ prev_frame = None
22
+
23
+ # create an empty HDF5 file
24
+ with h5py.File(args.output_file, 'w') as f: pass
25
+
26
+ # open the file for writing a flow maps into it
27
+ with h5py.File(args.output_file, 'a') as f:
28
+ flow_maps = f.create_dataset('flow_maps', shape=(0, 2, H, W, 2), maxshape=(None, 2, H, W, 2), dtype=np.float16)
29
+
30
+ for ind in tqdm(range(total_frames)):
31
+ # Read the next frame from the input video
32
+ if not input_video.isOpened(): break
33
+ ret, cur_frame = input_video.read()
34
+ if not ret: break
35
+
36
+ cur_frame = cv2.resize(cur_frame, (W, H))
37
+
38
+ if prev_frame is not None:
39
+ next_flow, prev_flow, occlusion_mask, frame1_bg_removed, frame2_bg_removed = RAFT_estimate_flow(prev_frame, cur_frame, subtract_background=args.remove_background)
40
+
41
+ # write data into a file
42
+ flow_maps.resize(ind, axis=0)
43
+ flow_maps[ind-1, 0] = next_flow
44
+ flow_maps[ind-1, 1] = prev_flow
45
+
46
+ occlusion_mask = np.clip(occlusion_mask * 0.2 * 255, 0, 255).astype(np.uint8)
47
+
48
+ if args.visualize:
49
+ # show the last written frame - useful to catch any issue with the process
50
+ if args.remove_background:
51
+ img_show = cv2.hconcat([cur_frame, frame2_bg_removed, occlusion_mask])
52
+ else:
53
+ img_show = cv2.hconcat([cur_frame, occlusion_mask])
54
+ cv2.imshow('Out img', img_show)
55
+ if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
56
+
57
+ prev_frame = cur_frame.copy()
58
+
59
+ # Release the input and output video files
60
+ input_video.release()
61
+
62
+ # Close all windows
63
+ if args.visualize: cv2.destroyAllWindows()
64
+
65
+ if __name__ == '__main__':
66
+ parser = argparse.ArgumentParser()
67
+ parser.add_argument('-i', '--input_video', help="Path to input video file", required=True)
68
+ parser.add_argument('-o', '--output_file', help="Path to output flow file. Stored in *.h5 format", required=True)
69
+ parser.add_argument('-W', '--width', help='Width of the generated flow maps', default=1024, type=int)
70
+ parser.add_argument('-H', '--height', help='Height of the generated flow maps', default=576, type=int)
71
+ parser.add_argument('-v', '--visualize', action='store_true', help='Show proceed images and occlusion maps')
72
+ parser.add_argument('-rb', '--remove_background', action='store_true', help='Remove background of the image')
73
+ args = parser.parse_args()
74
+
75
+ main(args)
SD-CN-Animation/old_scripts/flow_utils.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+
4
+ # RAFT dependencies
5
+ import sys
6
+ sys.path.append('RAFT/core')
7
+
8
+ from collections import namedtuple
9
+ import torch
10
+ import argparse
11
+ from raft import RAFT
12
+ from utils.utils import InputPadder
13
+
14
+ RAFT_model = None
15
+ fgbg = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=16, detectShadows=True)
16
+
17
+ def background_subtractor(frame, fgbg):
18
+ fgmask = fgbg.apply(frame)
19
+ return cv2.bitwise_and(frame, frame, mask=fgmask)
20
+
21
+ def RAFT_estimate_flow(frame1, frame2, device='cuda', subtract_background=True):
22
+ global RAFT_model
23
+ if RAFT_model is None:
24
+ args = argparse.Namespace(**{
25
+ 'model': 'RAFT/models/raft-things.pth',
26
+ 'mixed_precision': True,
27
+ 'small': False,
28
+ 'alternate_corr': False,
29
+ 'path': ""
30
+ })
31
+
32
+ RAFT_model = torch.nn.DataParallel(RAFT(args))
33
+ RAFT_model.load_state_dict(torch.load(args.model))
34
+
35
+ RAFT_model = RAFT_model.module
36
+ RAFT_model.to(device)
37
+ RAFT_model.eval()
38
+
39
+ if subtract_background:
40
+ frame1 = background_subtractor(frame1, fgbg)
41
+ frame2 = background_subtractor(frame2, fgbg)
42
+
43
+ with torch.no_grad():
44
+ frame1_torch = torch.from_numpy(frame1).permute(2, 0, 1).float()[None].to(device)
45
+ frame2_torch = torch.from_numpy(frame2).permute(2, 0, 1).float()[None].to(device)
46
+
47
+ padder = InputPadder(frame1_torch.shape)
48
+ image1, image2 = padder.pad(frame1_torch, frame2_torch)
49
+
50
+ # estimate optical flow
51
+ _, next_flow = RAFT_model(image1, image2, iters=20, test_mode=True)
52
+ _, prev_flow = RAFT_model(image2, image1, iters=20, test_mode=True)
53
+
54
+ next_flow = next_flow[0].permute(1, 2, 0).cpu().numpy()
55
+ prev_flow = prev_flow[0].permute(1, 2, 0).cpu().numpy()
56
+
57
+ fb_flow = next_flow + prev_flow
58
+ fb_norm = np.linalg.norm(fb_flow, axis=2)
59
+
60
+ occlusion_mask = fb_norm[..., None].repeat(3, axis=-1)
61
+
62
+ return next_flow, prev_flow, occlusion_mask, frame1, frame2
63
+
64
+ # ... rest of the file ...
65
+
66
+
67
+ def compute_diff_map(next_flow, prev_flow, prev_frame, cur_frame, prev_frame_styled):
68
+ h, w = cur_frame.shape[:2]
69
+
70
+ #print(np.amin(next_flow), np.amax(next_flow))
71
+ #exit()
72
+
73
+
74
+ fl_w, fl_h = next_flow.shape[:2]
75
+
76
+ # normalize flow
77
+ next_flow = next_flow / np.array([fl_h,fl_w])
78
+ prev_flow = prev_flow / np.array([fl_h,fl_w])
79
+
80
+ # remove low value noise (@alexfredo suggestion)
81
+ next_flow[np.abs(next_flow) < 0.05] = 0
82
+ prev_flow[np.abs(prev_flow) < 0.05] = 0
83
+
84
+ # resize flow
85
+ next_flow = cv2.resize(next_flow, (w, h))
86
+ next_flow = (next_flow * np.array([h,w])).astype(np.float32)
87
+ prev_flow = cv2.resize(prev_flow, (w, h))
88
+ prev_flow = (prev_flow * np.array([h,w])).astype(np.float32)
89
+
90
+ # Generate sampling grids
91
+ grid_y, grid_x = torch.meshgrid(torch.arange(0, h), torch.arange(0, w))
92
+ flow_grid = torch.stack((grid_x, grid_y), dim=0).float()
93
+ flow_grid += torch.from_numpy(prev_flow).permute(2, 0, 1)
94
+ flow_grid = flow_grid.unsqueeze(0)
95
+ flow_grid[:, 0, :, :] = 2 * flow_grid[:, 0, :, :] / (w - 1) - 1
96
+ flow_grid[:, 1, :, :] = 2 * flow_grid[:, 1, :, :] / (h - 1) - 1
97
+ flow_grid = flow_grid.permute(0, 2, 3, 1)
98
+
99
+
100
+ prev_frame_torch = torch.from_numpy(prev_frame).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
101
+ prev_frame_styled_torch = torch.from_numpy(prev_frame_styled).float().unsqueeze(0).permute(0, 3, 1, 2) #N, C, H, W
102
+
103
+ warped_frame = torch.nn.functional.grid_sample(prev_frame_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
104
+ warped_frame_styled = torch.nn.functional.grid_sample(prev_frame_styled_torch, flow_grid, padding_mode="reflection").permute(0, 2, 3, 1)[0].numpy()
105
+
106
+ #warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
107
+ #warped_frame_styled = cv2.remap(prev_frame_styled, flow_map, None, cv2.INTER_NEAREST, borderMode = cv2.BORDER_REFLECT)
108
+
109
+ # compute occlusion mask
110
+ fb_flow = next_flow + prev_flow
111
+ fb_norm = np.linalg.norm(fb_flow, axis=2)
112
+
113
+ occlusion_mask = fb_norm[..., None]
114
+
115
+ diff_mask_org = np.abs(warped_frame.astype(np.float32) - cur_frame.astype(np.float32)) / 255
116
+ diff_mask_org = diff_mask_org.max(axis = -1, keepdims=True)
117
+
118
+ diff_mask_stl = np.abs(warped_frame_styled.astype(np.float32) - cur_frame.astype(np.float32)) / 255
119
+ diff_mask_stl = diff_mask_stl.max(axis = -1, keepdims=True)
120
+
121
+ alpha_mask = np.maximum(occlusion_mask * 0.3, diff_mask_org * 4, diff_mask_stl * 2)
122
+ alpha_mask = alpha_mask.repeat(3, axis = -1)
123
+
124
+ #alpha_mask_blured = cv2.dilate(alpha_mask, np.ones((5, 5), np.float32))
125
+ alpha_mask = cv2.GaussianBlur(alpha_mask, (51,51), 5, cv2.BORDER_REFLECT)
126
+
127
+ alpha_mask = np.clip(alpha_mask, 0, 1)
128
+
129
+ return alpha_mask, warped_frame_styled
130
+
131
+ def frames_norm(occl): return occl / 127.5 - 1
132
+
133
+ def flow_norm(flow): return flow / 255
134
+
135
+ def occl_norm(occl): return occl / 127.5 - 1
136
+
137
+ def flow_renorm(flow): return flow * 255
138
+
139
+ def occl_renorm(occl): return (occl + 1) * 127.5
SD-CN-Animation/old_scripts/readme.md ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SD-CN-Animation
2
+ This project allows you to automate video stylization task using StableDiffusion and ControlNet. It also allows you to generate completely new videos from text at any resolution and length in contrast to other current text2video methods using any Stable Diffusion model as a backbone, including custom ones. It uses '[RAFT](https://github.com/princeton-vl/RAFT)' optical flow estimation algorithm to keep the animation stable and create an inpainting mask that is used to generate the next frame. In text to video mode it relies on 'FloweR' method (work in progress) that predicts optical flow from the previous frames.
3
+
4
+
5
+ ### Video to Video Examples:
6
+ <!--
7
+ [![IMAGE_ALT](https://img.youtube.com/vi/j-0niEMm6DU/0.jpg)](https://youtu.be/j-0niEMm6DU)
8
+ This script can also be using to swap the person in the video like in this example: https://youtube.com/shorts/be93_dIeZWU
9
+ -->
10
+ </table>
11
+ <table class="center">
12
+ <tr>
13
+ <td><img src="examples/girl_org.gif" raw=true></td>
14
+ <td><img src="examples/girl_to_jc.gif" raw=true></td>
15
+ <td><img src="examples/girl_to_wc.gif" raw=true></td>
16
+ </tr>
17
+ <tr>
18
+ <td width=33% align="center">Original video</td>
19
+ <td width=33% align="center">"Jessica Chastain"</td>
20
+ <td width=33% align="center">"Watercolor painting"</td>
21
+ </tr>
22
+ </table>
23
+
24
+ Examples presented are generated at 1024x576 resolution using the 'realisticVisionV13_v13' model as a base. They were cropt, downsized and compressed for better loading speed. You can see them in their original quality in the 'examples' folder.
25
+
26
+ ### Text to Video Examples:
27
+ </table>
28
+ <table class="center">
29
+ <tr>
30
+ <td><img src="examples/flower_1.gif" raw=true></td>
31
+ <td><img src="examples/bonfire_1.gif" raw=true></td>
32
+ <td><img src="examples/diamond_4.gif" raw=true></td>
33
+ </tr>
34
+ <tr>
35
+ <td width=33% align="center">"close up of a flower"</td>
36
+ <td width=33% align="center">"bonfire near the camp in the mountains at night"</td>
37
+ <td width=33% align="center">"close up of a diamond laying on the table"</td>
38
+ </tr>
39
+ <tr>
40
+ <td><img src="examples/macaroni_1.gif" raw=true></td>
41
+ <td><img src="examples/gold_1.gif" raw=true></td>
42
+ <td><img src="examples/tree_2.gif" raw=true></td>
43
+ </tr>
44
+ <tr>
45
+ <td width=33% align="center">"close up of macaroni on the plate"</td>
46
+ <td width=33% align="center">"close up of golden sphere"</td>
47
+ <td width=33% align="center">"a tree standing in the winter forest"</td>
48
+ </tr>
49
+ </table>
50
+
51
+ All examples you can see here are originally generated at 512x512 resolution using the 'sd-v1-5-inpainting' model as a base. They were downsized and compressed for better loading speed. You can see them in their original quality in the 'examples' folder. Actual prompts used were stated in the following format: "RAW photo, {subject}, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3", only the 'subject' part is described in the table above.
52
+
53
+
54
+
55
+ ## Dependencies
56
+ To install all the necessary dependencies, run this command:
57
+ ```
58
+ pip install opencv-python opencv-contrib-python numpy tqdm h5py scikit-image
59
+ ```
60
+ You have to set up the RAFT repository as it described here: https://github.com/princeton-vl/RAFT . Basically it just comes down to running "./download_models.sh" in RAFT folder to download the models.
61
+
62
+
63
+ ## Running the scripts
64
+ This script works on top of [Automatic1111/web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) interface via API. To run this script you have to set it up first. You should also have[sd-webui-controlnet](https://github.com/Mikubill/sd-webui-controlnet) extension installed. You need to have the control_hed-fp16 model installed. If you have web-ui with ControlNet working correctly, you have to also allow the API to work with controlNet. To do so, go to the web-ui settings -> ControlNet tab -> Set "Allow other script to control this extension" checkbox to active and set "Multi ControlNet: Max models amount (requires restart)" to more then 2 -> press "Apply settings".
65
+
66
+
67
+ ### Video To Video
68
+ #### Step 1.
69
+ To process the video, first of all you would need to precompute optical flow data before running web-ui with this command:
70
+ ```
71
+ python3 compute_flow.py -i "path to your video" -o "path to output file with *.h5 format" -v -W width_of_the_flow_map -H height_of_the_flow_map
72
+ ```
73
+ The main reason to do this step separately is to save precious GPU memory that will be useful to generate better quality images. Choose W and H parameters as high as your GPU can handle with respect to the proportion of original video resolution. Do not worry if it is higher or less then the processing resolution, flow maps will be scaled accordingly at the processing stage. This will generate quite a large file that may take up to a several gigabytes on the drive even for minute long video. If you want to process a long video consider splitting it into several parts beforehand.
74
+
75
+
76
+ #### Step 2.
77
+ Run web-ui with '--api' flag. It is also better to use '--xformers' flag, as you would need to have the highest resolution possible and using xformers memory optimization will greatly help.
78
+ ```
79
+ bash webui.sh --xformers --api
80
+ ```
81
+
82
+
83
+ #### Step 3.
84
+ Go to the **vid2vid.py** file and change main parameters (INPUT_VIDEO, FLOW_MAPS, OUTPUT_VIDEO, PROMPT, N_PROMPT, W, H) to the ones you need for your project. FLOW_MAPS parameter should contain a path to the flow file that you generated at the first step. The script is pretty simple so you may change other parameters as well, although I would recommend to leave them as is for the first time. Finally run the script with the command:
85
+ ```
86
+ python3 vid2vid.py
87
+ ```
88
+
89
+
90
+ ### Text To Video
91
+ This method is still in development and works on top of ‘Stable Diffusion’ and 'FloweR' - optical flow reconstruction method that is also in a yearly development stage. Do not expect much from it as it is more of a proof of a concept rather than a complete solution.
92
+
93
+ #### Step 1.
94
+ Download 'FloweR_0.1.pth' model from here: [Google drive link](https://drive.google.com/file/d/1WhzoVIw6Kdg4EjfK9LaTLqFm5dF-IJ7F/view?usp=share_link) and place it in the 'FloweR' folder.
95
+
96
+ #### Step 2.
97
+ Same as with vid2vid case, run web-ui with '--api' flag. It is also better to use '--xformers' flag, as you would need to have the highest resolution possible and using xformers memory optimization will greatly help.
98
+ ```
99
+ bash webui.sh --xformers --api
100
+ ```
101
+
102
+ #### Step 3.
103
+ Go to the **txt2vid.py** file and change main parameters (OUTPUT_VIDEO, PROMPT, N_PROMPT, W, H) to the ones you need for your project. Again, the script is simple so you may change other parameters if you want to. Finally run the script with the command:
104
+ ```
105
+ python3 txt2vid.py
106
+ ```
107
+
108
+ ## Last version changes: v0.5
109
+ * Fixed an issue with the wrong direction of an optical flow applied to an image.
110
+ * Added text to video mode within txt2vid.py script. Make sure to update new dependencies for this script to work!
111
+ * Added a threshold for an optical flow before processing the frame to remove white noise that might appear, as it was suggested by [@alexfredo](https://github.com/alexfredo).
112
+ * Background removal at flow computation stage implemented by [@CaptnSeraph](https://github.com/CaptnSeraph), it should reduce ghosting effect in most of the videos processed with vid2vid script.
113
+
114
+ <!--
115
+ ## Last version changes: v0.6
116
+ * Added separate flag '-rb' for background removal process at the flow computation stage in the compute_flow.py script.
117
+ * Added flow normalization before rescaling it, so the magnitude of the flow computed correctly at the different resolution.
118
+ * Less ghosting and color change in vid2vid mode
119
+ -->
120
+
121
+ <!--
122
+ ## Potential improvements
123
+ There are several ways overall quality of animation may be improved:
124
+ * You may use a separate processing for each camera position to get a more consistent style of the characters and less ghosting.
125
+ * Because the quality of the video depends on how good optical flow was estimated it might be beneficial to use high frame rate video as a source, so it would be easier to guess the flow properly.
126
+ * The quality of flow estimation might be greatly improved with a proper flow estimation model like this one: https://github.com/autonomousvision/unimatch .
127
+ -->
128
+ ## Licence
129
+ This repository can only be used for personal/research/non-commercial purposes. However, for commercial requests, please contact me directly at borsky.alexey@gmail.com
130
+
131
+
132
+
133
+
SD-CN-Animation/old_scripts/txt2vid.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import cv2
3
+ import base64
4
+ import numpy as np
5
+ from tqdm import tqdm
6
+ import os
7
+
8
+ import sys
9
+ sys.path.append('FloweR/')
10
+ sys.path.append('RAFT/core')
11
+
12
+ import torch
13
+ from model import FloweR
14
+ from utils import flow_viz
15
+
16
+ from flow_utils import *
17
+ import skimage
18
+ import datetime
19
+
20
+
21
+ OUTPUT_VIDEO = f'videos/result_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}.mp4'
22
+
23
+ PROMPT = "people looking at flying robots. Future. People looking to the sky. Stars in the background. Dramatic light, Cinematic light. Soft lighting, high quality, film grain."
24
+ N_PROMPT = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, letters, logo, brand, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
25
+ w,h = 768, 512 # Width and height of the processed image. Note that actual image processed would be a W x H resolution.
26
+
27
+ SAVE_FRAMES = True # saves individual frames into 'out' folder if set True. Again might be helpful with long animations
28
+
29
+ PROCESSING_STRENGTH = 0.85
30
+ FIX_STRENGTH = 0.35
31
+
32
+ CFG_SCALE = 5.5
33
+
34
+ APPLY_TEMPORALNET = False
35
+ APPLY_COLOR = False
36
+
37
+ VISUALIZE = True
38
+ DEVICE = 'cuda'
39
+
40
+ def to_b64(img):
41
+ img_cliped = np.clip(img, 0, 255).astype(np.uint8)
42
+ _, buffer = cv2.imencode('.png', img_cliped)
43
+ b64img = base64.b64encode(buffer).decode("utf-8")
44
+ return b64img
45
+
46
+ class controlnetRequest():
47
+ def __init__(self, b64_init_img = None, b64_prev_img = None, b64_color_img = None, ds = 0.35, w=w, h=h, mask = None, seed=-1, mode='img2img'):
48
+ self.url = f"http://localhost:7860/sdapi/v1/{mode}"
49
+ self.body = {
50
+ "init_images": [b64_init_img],
51
+ "mask": mask,
52
+ "mask_blur": 0,
53
+ "inpainting_fill": 1,
54
+ "inpainting_mask_invert": 0,
55
+ "prompt": PROMPT,
56
+ "negative_prompt": N_PROMPT,
57
+ "seed": seed,
58
+ "subseed": -1,
59
+ "subseed_strength": 0,
60
+ "batch_size": 1,
61
+ "n_iter": 1,
62
+ "steps": 15,
63
+ "cfg_scale": CFG_SCALE,
64
+ "denoising_strength": ds,
65
+ "width": w,
66
+ "height": h,
67
+ "restore_faces": False,
68
+ "eta": 0,
69
+ "sampler_index": "DPM++ 2S a",
70
+ "control_net_enabled": True,
71
+ "alwayson_scripts": {
72
+ "ControlNet":{"args": []}
73
+ },
74
+ }
75
+
76
+ if APPLY_TEMPORALNET:
77
+ self.body["alwayson_scripts"]["ControlNet"]["args"].append({
78
+ "input_image": b64_prev_img,
79
+ "module": "none",
80
+ "model": "diff_control_sd15_temporalnet_fp16 [adc6bd97]",
81
+ "weight": 0.65,
82
+ "resize_mode": "Just Resize",
83
+ "lowvram": False,
84
+ "processor_res": 512,
85
+ "guidance_start": 0,
86
+ "guidance_end": 0.65,
87
+ "guessmode": False
88
+ })
89
+
90
+ if APPLY_COLOR:
91
+ self.body["alwayson_scripts"]["ControlNet"]["args"].append({
92
+ "input_image": b64_prev_img,
93
+ "module": "color",
94
+ "model": "t2iadapter_color_sd14v1 [8522029d]",
95
+ "weight": 0.65,
96
+ "resize_mode": "Just Resize",
97
+ "lowvram": False,
98
+ "processor_res": 512,
99
+ "guidance_start": 0,
100
+ "guidance_end": 0.65,
101
+ "guessmode": False
102
+ })
103
+
104
+
105
+ def sendRequest(self):
106
+ # Request to web-ui
107
+ data_js = requests.post(self.url, json=self.body).json()
108
+
109
+ # Convert the byte array to a NumPy array
110
+ image_bytes = base64.b64decode(data_js["images"][0])
111
+ np_array = np.frombuffer(image_bytes, dtype=np.uint8)
112
+
113
+ # Convert the NumPy array to a cv2 image
114
+ out_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
115
+ return out_image
116
+
117
+
118
+
119
+ if VISUALIZE: cv2.namedWindow('Out img')
120
+
121
+
122
+ # Create an output video file with the same fps, width, and height as the input video
123
+ output_video = cv2.VideoWriter(OUTPUT_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), 15, (w, h))
124
+
125
+ prev_frame = None
126
+ prev_frame_styled = None
127
+
128
+
129
+ # Instantiate the model
130
+ model = FloweR(input_size = (h, w))
131
+ model.load_state_dict(torch.load('FloweR/FloweR_0.1.1.pth'))
132
+ # Move the model to the device
133
+ model = model.to(DEVICE)
134
+
135
+
136
+ init_frame = controlnetRequest(mode='txt2img', ds=PROCESSING_STRENGTH, w=w, h=h).sendRequest()
137
+
138
+ output_video.write(init_frame)
139
+ prev_frame = init_frame
140
+
141
+ clip_frames = np.zeros((4, h, w, 3), dtype=np.uint8)
142
+
143
+ color_shift = np.zeros((0, 3))
144
+ color_scale = np.zeros((0, 3))
145
+ for ind in tqdm(range(450)):
146
+ clip_frames = np.roll(clip_frames, -1, axis=0)
147
+ clip_frames[-1] = prev_frame
148
+
149
+ clip_frames_torch = frames_norm(torch.from_numpy(clip_frames).to(DEVICE, dtype=torch.float32))
150
+
151
+ with torch.no_grad():
152
+ pred_data = model(clip_frames_torch.unsqueeze(0))[0]
153
+
154
+ pred_flow = flow_renorm(pred_data[...,:2]).cpu().numpy()
155
+ pred_occl = occl_renorm(pred_data[...,2:3]).cpu().numpy().repeat(3, axis = -1)
156
+
157
+ pred_flow = pred_flow / (1 + np.linalg.norm(pred_flow, axis=-1, keepdims=True) * 0.05)
158
+ pred_flow = cv2.GaussianBlur(pred_flow, (31,31), 1, cv2.BORDER_REFLECT_101)
159
+
160
+
161
+ pred_occl = cv2.GaussianBlur(pred_occl, (21,21), 2, cv2.BORDER_REFLECT_101)
162
+ pred_occl = (np.abs(pred_occl / 255) ** 1.5) * 255
163
+ pred_occl = np.clip(pred_occl * 25, 0, 255).astype(np.uint8)
164
+
165
+ flow_map = pred_flow.copy()
166
+ flow_map[:,:,0] += np.arange(w)
167
+ flow_map[:,:,1] += np.arange(h)[:,np.newaxis]
168
+
169
+ warped_frame = cv2.remap(prev_frame, flow_map, None, cv2.INTER_CUBIC, borderMode = cv2.BORDER_REFLECT_101)
170
+
171
+ out_image = warped_frame.copy()
172
+
173
+ out_image = controlnetRequest(
174
+ b64_init_img = to_b64(out_image),
175
+ b64_prev_img = to_b64(prev_frame),
176
+ b64_color_img = to_b64(warped_frame),
177
+ mask = to_b64(pred_occl),
178
+ ds=PROCESSING_STRENGTH, w=w, h=h).sendRequest()
179
+
180
+ out_image = controlnetRequest(
181
+ b64_init_img = to_b64(out_image),
182
+ b64_prev_img = to_b64(prev_frame),
183
+ b64_color_img = to_b64(warped_frame),
184
+ mask = None,
185
+ ds=FIX_STRENGTH, w=w, h=h).sendRequest()
186
+
187
+ # These step is necessary to reduce color drift of the image that some models may cause
188
+ out_image = skimage.exposure.match_histograms(out_image, init_frame, multichannel=True, channel_axis=-1)
189
+
190
+ output_video.write(out_image)
191
+ if SAVE_FRAMES:
192
+ if not os.path.isdir('out'): os.makedirs('out')
193
+ cv2.imwrite(f'out/{ind+1:05d}.png', out_image)
194
+
195
+ pred_flow_img = flow_viz.flow_to_image(pred_flow)
196
+ frames_img = cv2.hconcat(list(clip_frames))
197
+ data_img = cv2.hconcat([pred_flow_img, pred_occl, warped_frame, out_image])
198
+
199
+ cv2.imshow('Out img', cv2.vconcat([frames_img, data_img]))
200
+ if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
201
+
202
+ prev_frame = out_image.copy()
203
+
204
+ # Release the input and output video files
205
+ output_video.release()
206
+
207
+ # Close all windows
208
+ if VISUALIZE: cv2.destroyAllWindows()
SD-CN-Animation/old_scripts/vid2vid.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import cv2
3
+ import base64
4
+ import numpy as np
5
+ from tqdm import tqdm
6
+ import os
7
+
8
+ import h5py
9
+ from flow_utils import compute_diff_map
10
+
11
+ import skimage
12
+ import datetime
13
+
14
+ INPUT_VIDEO = "/media/alex/ded3efe6-5825-429d-ac89-7ded676a2b6d/media/Peter_Gabriel/pexels-monstera-5302599-4096x2160-30fps.mp4"
15
+ FLOW_MAPS = "/media/alex/ded3efe6-5825-429d-ac89-7ded676a2b6d/media/Peter_Gabriel/pexels-monstera-5302599-4096x2160-30fps.h5"
16
+ OUTPUT_VIDEO = f'videos/result_{datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")}.mp4'
17
+
18
+ PROMPT = "Underwater shot Peter Gabriel with closed eyes in Peter Gabriel's music video. 80's music video. VHS style. Dramatic light, Cinematic light. RAW photo, 8k uhd, dslr, soft lighting, high quality, film grain."
19
+ N_PROMPT = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime:1.4), text, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
20
+ w,h = 1088, 576 # Width and height of the processed image. Note that actual image processed would be a W x H resolution.
21
+
22
+ START_FROM_IND = 0 # index of a frame to start a processing from. Might be helpful with long animations where you need to restart the script multiple times
23
+ SAVE_FRAMES = True # saves individual frames into 'out' folder if set True. Again might be helpful with long animations
24
+
25
+ PROCESSING_STRENGTH = 0.95
26
+ BLUR_FIX_STRENGTH = 0.15
27
+
28
+ APPLY_HED = True
29
+ APPLY_CANNY = False
30
+ APPLY_DEPTH = False
31
+ GUESSMODE = False
32
+
33
+ CFG_SCALE = 5.5
34
+
35
+ VISUALIZE = True
36
+
37
+ def to_b64(img):
38
+ img_cliped = np.clip(img, 0, 255).astype(np.uint8)
39
+ _, buffer = cv2.imencode('.png', img_cliped)
40
+ b64img = base64.b64encode(buffer).decode("utf-8")
41
+ return b64img
42
+
43
+ class controlnetRequest():
44
+ def __init__(self, b64_cur_img, b64_hed_img, ds = 0.35, w=w, h=h, mask = None, seed=-1):
45
+ self.url = "http://localhost:7860/sdapi/v1/img2img"
46
+ self.body = {
47
+ "init_images": [b64_cur_img],
48
+ "mask": mask,
49
+ "mask_blur": 0,
50
+ "inpainting_fill": 1,
51
+ "inpainting_mask_invert": 0,
52
+ "prompt": PROMPT,
53
+ "negative_prompt": N_PROMPT,
54
+ "seed": seed,
55
+ "subseed": -1,
56
+ "subseed_strength": 0,
57
+ "batch_size": 1,
58
+ "n_iter": 1,
59
+ "steps": 15,
60
+ "cfg_scale": CFG_SCALE,
61
+ "denoising_strength": ds,
62
+ "width": w,
63
+ "height": h,
64
+ "restore_faces": False,
65
+ "eta": 0,
66
+ "sampler_index": "DPM++ 2S a",
67
+ "control_net_enabled": True,
68
+ "alwayson_scripts": {
69
+ "ControlNet":{"args": []}
70
+ },
71
+ }
72
+
73
+ if APPLY_HED:
74
+ self.body["alwayson_scripts"]["ControlNet"]["args"].append({
75
+ "input_image": b64_hed_img,
76
+ "module": "hed",
77
+ "model": "control_hed-fp16 [13fee50b]",
78
+ "weight": 0.65,
79
+ "resize_mode": "Just Resize",
80
+ "lowvram": False,
81
+ "processor_res": 512,
82
+ "guidance_start": 0,
83
+ "guidance_end": 0.65,
84
+ "guessmode": GUESSMODE
85
+ })
86
+
87
+ if APPLY_CANNY:
88
+ self.body["alwayson_scripts"]["ControlNet"]["args"].append({
89
+ "input_image": b64_hed_img,
90
+ "module": "canny",
91
+ "model": "control_canny-fp16 [e3fe7712]",
92
+ "weight": 0.85,
93
+ "resize_mode": "Just Resize",
94
+ "lowvram": False,
95
+ "threshold_a": 35,
96
+ "threshold_b": 35,
97
+ "processor_res": 512,
98
+ "guidance_start": 0,
99
+ "guidance_end": 0.85,
100
+ "guessmode": GUESSMODE
101
+ })
102
+
103
+ if APPLY_DEPTH:
104
+ self.body["alwayson_scripts"]["ControlNet"]["args"].append({
105
+ "input_image": b64_hed_img,
106
+ "module": "depth",
107
+ "model": "control_depth-fp16 [400750f6]",
108
+ "weight": 0.85,
109
+ "resize_mode": "Just Resize",
110
+ "lowvram": False,
111
+ "processor_res": 512,
112
+ "guidance_start": 0,
113
+ "guidance_end": 0.85,
114
+ "guessmode": GUESSMODE
115
+ })
116
+
117
+
118
+ def sendRequest(self):
119
+ # Request to web-ui
120
+ data_js = requests.post(self.url, json=self.body).json()
121
+
122
+ # Convert the byte array to a NumPy array
123
+ image_bytes = base64.b64decode(data_js["images"][0])
124
+ np_array = np.frombuffer(image_bytes, dtype=np.uint8)
125
+
126
+ # Convert the NumPy array to a cv2 image
127
+ out_image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
128
+ return out_image
129
+
130
+
131
+
132
+ if VISUALIZE: cv2.namedWindow('Out img')
133
+
134
+ # Open the input video file
135
+ input_video = cv2.VideoCapture(INPUT_VIDEO)
136
+
137
+ # Get useful info from the source video
138
+ fps = int(input_video.get(cv2.CAP_PROP_FPS))
139
+ total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
140
+
141
+ # Create an output video file with the same fps, width, and height as the input video
142
+ output_video = cv2.VideoWriter(OUTPUT_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
143
+
144
+ prev_frame = None
145
+ prev_frame_styled = None
146
+ #init_image = None
147
+
148
+ # reading flow maps in a stream manner
149
+ with h5py.File(FLOW_MAPS, 'r') as f:
150
+ flow_maps = f['flow_maps']
151
+
152
+ for ind in tqdm(range(total_frames)):
153
+ # Read the next frame from the input video
154
+ if not input_video.isOpened(): break
155
+ ret, cur_frame = input_video.read()
156
+ if not ret: break
157
+
158
+ if ind+1 < START_FROM_IND: continue
159
+
160
+ is_keyframe = True
161
+ if prev_frame is not None:
162
+ # Compute absolute difference between current and previous frame
163
+ frames_diff = cv2.absdiff(cur_frame, prev_frame)
164
+ # Compute mean of absolute difference
165
+ mean_diff = cv2.mean(frames_diff)[0]
166
+ # Check if mean difference is above threshold
167
+ is_keyframe = mean_diff > 30
168
+
169
+ # Generate course version of a current frame with previous stylized frame as a reference image
170
+ if is_keyframe:
171
+ # Resize the frame to proper resolution
172
+ frame = cv2.resize(cur_frame, (w, h))
173
+
174
+ # Processing current frame with current frame as a mask without any inpainting
175
+ out_image = controlnetRequest(to_b64(frame), to_b64(frame), PROCESSING_STRENGTH, w, h, mask = None).sendRequest()
176
+
177
+ alpha_img = out_image.copy()
178
+ out_image_ = out_image.copy()
179
+ warped_styled = out_image.copy()
180
+ #init_image = out_image.copy()
181
+ else:
182
+ # Resize the frame to proper resolution
183
+ frame = cv2.resize(cur_frame, (w, h))
184
+ prev_frame = cv2.resize(prev_frame, (w, h))
185
+
186
+ # Processing current frame with current frame as a mask without any inpainting
187
+ out_image = controlnetRequest(to_b64(frame), to_b64(frame), PROCESSING_STRENGTH, w, h, mask = None).sendRequest()
188
+
189
+ next_flow, prev_flow = flow_maps[ind-1].astype(np.float32)
190
+ alpha_mask, warped_styled = compute_diff_map(next_flow, prev_flow, prev_frame, frame, prev_frame_styled)
191
+
192
+ # This clipping at lower side required to fix small trailing issues that for some reason left outside of the bright part of the mask,
193
+ # and at the higher part it making parts changed strongly to do it with less flickering.
194
+ alpha_mask = np.clip(alpha_mask + 0.05, 0.05, 0.95)
195
+ alpha_img = np.clip(alpha_mask * 255, 0, 255).astype(np.uint8)
196
+
197
+ # normalizing the colors
198
+ out_image = skimage.exposure.match_histograms(out_image, frame, multichannel=False, channel_axis=-1)
199
+
200
+ out_image = out_image.astype(float) * alpha_mask + warped_styled.astype(float) * (1 - alpha_mask)
201
+
202
+ #out_image = skimage.exposure.match_histograms(out_image, prev_frame, multichannel=True, channel_axis=-1)
203
+ #out_image_ = (out_image * 0.65 + warped_styled * 0.35)
204
+
205
+
206
+ # Bluring issue fix via additional processing
207
+ out_image_fixed = controlnetRequest(to_b64(out_image), to_b64(frame), BLUR_FIX_STRENGTH, w, h, mask = None, seed=8888).sendRequest()
208
+
209
+
210
+ # Write the frame to the output video
211
+ frame_out = np.clip(out_image_fixed, 0, 255).astype(np.uint8)
212
+ output_video.write(frame_out)
213
+
214
+ if VISUALIZE:
215
+ # show the last written frame - useful to catch any issue with the process
216
+ warped_styled = np.clip(warped_styled, 0, 255).astype(np.uint8)
217
+
218
+ img_show_top = cv2.hconcat([frame, warped_styled])
219
+ img_show_bot = cv2.hconcat([frame_out, alpha_img])
220
+ cv2.imshow('Out img', cv2.vconcat([img_show_top, img_show_bot]))
221
+ cv2.setWindowTitle("Out img", str(ind+1))
222
+ if cv2.waitKey(1) & 0xFF == ord('q'): exit() # press Q to close the script while processing
223
+
224
+ if SAVE_FRAMES:
225
+ if not os.path.isdir('out'): os.makedirs('out')
226
+ cv2.imwrite(f'out/{ind+1:05d}.png', frame_out)
227
+
228
+ prev_frame = cur_frame.copy()
229
+ prev_frame_styled = out_image.copy()
230
+
231
+
232
+ # Release the input and output video files
233
+ input_video.release()
234
+ output_video.release()
235
+
236
+ # Close all windows
237
+ if VISUALIZE: cv2.destroyAllWindows()
SD-CN-Animation/readme.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SD-CN-Animation
2
+ This project allows you to automate video stylization task using StableDiffusion and ControlNet. It also allows you to generate completely new videos from text at any resolution and length in contrast to other current text2video methods using any Stable Diffusion model as a backbone, including custom ones. It uses '[RAFT](https://github.com/princeton-vl/RAFT)' optical flow estimation algorithm to keep the animation stable and create an occlusion mask that is used to generate the next frame. In text to video mode it relies on 'FloweR' method (work in progress) that predicts optical flow from the previous frames.
3
+
4
+ ![sd-cn-animation ui preview](examples/ui_preview.png)
5
+ sd-cn-animation ui preview
6
+
7
+ **In vid2vid mode do not forget to activate ControlNet model to achieve better results. Without it the resulting video might be quite choppy. Do not put any images in CN as the frames would pass automatically from the video.**
8
+ Here are CN parameters that seem to give the best results so far:
9
+ ![sd-cn-animation cn params](examples/cn_settings.png)
10
+
11
+
12
+ ### Video to Video Examples:
13
+ </table>
14
+ <table class="center">
15
+ <tr>
16
+ <td><img src="examples/girl_org.gif" raw=true></td>
17
+ <td><img src="examples/girl_to_jc.gif" raw=true></td>
18
+ <td><img src="examples/girl_to_wc.gif" raw=true></td>
19
+ </tr>
20
+ <tr>
21
+ <td width=33% align="center">Original video</td>
22
+ <td width=33% align="center">"Jessica Chastain"</td>
23
+ <td width=33% align="center">"Watercolor painting"</td>
24
+ </tr>
25
+ </table>
26
+
27
+ Examples presented are generated at 1024x576 resolution using the 'realisticVisionV13_v13' model as a base. They were cropt, downsized and compressed for better loading speed. You can see them in their original quality in the 'examples' folder.
28
+
29
+ ### Text to Video Examples:
30
+ </table>
31
+ <table class="center">
32
+ <tr>
33
+ <td><img src="examples/flower_1.gif" raw=true></td>
34
+ <td><img src="examples/bonfire_1.gif" raw=true></td>
35
+ <td><img src="examples/diamond_4.gif" raw=true></td>
36
+ </tr>
37
+ <tr>
38
+ <td width=33% align="center">"close up of a flower"</td>
39
+ <td width=33% align="center">"bonfire near the camp in the mountains at night"</td>
40
+ <td width=33% align="center">"close up of a diamond laying on the table"</td>
41
+ </tr>
42
+ <tr>
43
+ <td><img src="examples/macaroni_1.gif" raw=true></td>
44
+ <td><img src="examples/gold_1.gif" raw=true></td>
45
+ <td><img src="examples/tree_2.gif" raw=true></td>
46
+ </tr>
47
+ <tr>
48
+ <td width=33% align="center">"close up of macaroni on the plate"</td>
49
+ <td width=33% align="center">"close up of golden sphere"</td>
50
+ <td width=33% align="center">"a tree standing in the winter forest"</td>
51
+ </tr>
52
+ </table>
53
+
54
+ All examples you can see here are originally generated at 512x512 resolution using the 'sd-v1-5-inpainting' model as a base. They were downsized and compressed for better loading speed. You can see them in their original quality in the 'examples' folder. Actual prompts used were stated in the following format: "RAW photo, {subject}, 8k uhd, dslr, soft lighting, high quality, film grain, Fujifilm XT3", only the 'subject' part is described in the table above.
55
+
56
+ ## Installing the extension
57
+ To install the extension go to 'Extensions' tab in [Automatic1111 web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui), then go to 'Install from URL' tab. In 'URL for extension's git repository' field inter the path to this repository, i.e. 'https://github.com/volotat/SD-CN-Animation.git'. Leave 'Local directory name' field empty. Then just press 'Install' button. Restart web-ui, new 'SD-CN-Animation' tab should appear. All generated video will be saved into 'stable-diffusion-webui/outputs/sd-cn-animation' folder.
58
+
59
+ ## Known issues
60
+ * If you see error like this ```IndexError: list index out of range``` try to restart webui, it should fix it. If the issue still prevelent try to uninstall and reinstall scikit-image==0.19.2 with no --no-cache-dir flag like this.
61
+ ```
62
+ pip uninstall scikit-image
63
+ pip install scikit-image==0.19.2 --no-cache-dir
64
+ ```
65
+ * The extension might work incorrectly if 'Apply color correction to img2img results to match original colors.' option is enabled. Make sure to disable it in 'Settings' tab -> 'Stable Diffusion' section.
66
+ * If you have an error like 'Need to enable queue to use generators.', please update webui to the latest version. Beware that only [Automatic1111 web-ui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) is fully supported.
67
+ * The extension is not compatible with Macs. If you have a case that extension is working for you or do you know how to make it compatible, please open a new discussion.
68
+
69
+ ## Last version changes: v0.9
70
+ * Fixed issues #69, #76, #91, #92.
71
+ * Fixed an issue in vid2vid mode when an occlusion mask computed from the optical flow may include unnecessary parts (where flow is non-zero).
72
+ * Added 'Extra params' in vid2vid mode for more fine-grain controls of the processing pipeline.
73
+ * Better default parameters set for vid2vid pipeline.
74
+ * In txt2vid mode after the first frame is generated the seed is now automatically set to -1 to prevent blurring issues.
75
+ * Added an option to save resulting frames into a folder alongside the video.
76
+ * Added ability to export current parameters in a human readable form as a json.
77
+ * Interpolation mode in the flow-applying stage is set to ‘nearest’ to reduce overtime image blurring.
78
+ * Added ControlNet to txt2vid mode as well as fixing #86 issue, thanks to [@mariaWitch](https://github.com/mariaWitch)
79
+ * Fixed a major issue when ConrtolNet used wrong input images. Because of this vid2vid results were way worse than they should be.
80
+ * Text to video mode now supports video as a guidance for ControlNet. It allows to create much stronger video stylizations.
81
+
82
+ <!--
83
+ * ControlNet with preprocessers like "reference_only", "reference_adain", "reference_adain+attn" are not reseted with video frames to have an ability to control style of the video.
84
+ * Fixed an issue because of witch 'processing_strength' UI parameters does not actually affected denoising strength at the fist processing step.
85
+ * Fixed issue #112. It will not try to reinstall requirements at every start of webui.
86
+ * Some improvements in text 2 video method.
87
+ * Parameters used to generated a video now automatically saved in video's folder.
88
+ * Added ability to control what frame will be send to CN in text to video mode.
89
+ -->
SD-CN-Animation/requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ scikit-image
SD-CN-Animation/scripts/__pycache__/base_ui.cpython-310.pyc ADDED
Binary file (11.6 kB). View file
 
SD-CN-Animation/scripts/base_ui.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys, os
2
+
3
+ import gradio as gr
4
+ import modules
5
+ from types import SimpleNamespace
6
+
7
+ from modules import script_callbacks, shared
8
+ from modules.shared import cmd_opts, opts
9
+ from webui import wrap_gradio_gpu_call
10
+
11
+ from modules.ui_components import ToolButton, FormRow, FormGroup
12
+ from modules.ui import create_override_settings_dropdown
13
+ import modules.scripts as scripts
14
+
15
+ from modules.sd_samplers import samplers_for_img2img
16
+ from modules.ui import setup_progressbar, create_sampler_and_steps_selection, ordered_ui_categories, create_output_panel
17
+
18
+ from scripts.core import vid2vid, txt2vid, utils
19
+ import traceback
20
+
21
+ def V2VArgs():
22
+ seed = -1
23
+ width = 1024
24
+ height = 576
25
+ cfg_scale = 5.5
26
+ steps = 15
27
+ prompt = ""
28
+ n_prompt = "text, letters, logo, brand, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
29
+ processing_strength = 0.85
30
+ fix_frame_strength = 0.15
31
+ return locals()
32
+
33
+ def T2VArgs():
34
+ seed = -1
35
+ width = 768
36
+ height = 512
37
+ cfg_scale = 5.5
38
+ steps = 15
39
+ prompt = ""
40
+ n_prompt = "((blur, blurr, blurred, blurry, fuzzy, unclear, unfocus, bocca effect)), text, letters, logo, brand, close up, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck"
41
+ processing_strength = 0.75
42
+ fix_frame_strength = 0.35
43
+ return locals()
44
+
45
+ def setup_common_values(mode, d):
46
+ with gr.Row():
47
+ width = gr.Slider(label='Width', minimum=64, maximum=2048, step=64, value=d.width, interactive=True)
48
+ height = gr.Slider(label='Height', minimum=64, maximum=2048, step=64, value=d.height, interactive=True)
49
+ with gr.Row(elem_id=f'{mode}_prompt_toprow'):
50
+ prompt = gr.Textbox(label='Prompt', lines=3, interactive=True, elem_id=f"{mode}_prompt", placeholder="Enter your prompt here...")
51
+ with gr.Row(elem_id=f'{mode}_n_prompt_toprow'):
52
+ n_prompt = gr.Textbox(label='Negative prompt', lines=3, interactive=True, elem_id=f"{mode}_n_prompt", value=d.n_prompt)
53
+ with gr.Row():
54
+ cfg_scale = gr.Slider(label='CFG scale', minimum=1, maximum=100, step=1, value=d.cfg_scale, interactive=True)
55
+ with gr.Row():
56
+ seed = gr.Number(label='Seed (this parameter controls how the first frame looks like and the color distribution of the consecutive frames as they are dependent on the first one)', value = d.seed, Interactive = True, precision=0)
57
+ with gr.Row():
58
+ processing_strength = gr.Slider(label="Processing strength (Step 1)", value=d.processing_strength, minimum=0, maximum=1, step=0.05, interactive=True)
59
+ fix_frame_strength = gr.Slider(label="Fix frame strength (Step 2)", value=d.fix_frame_strength, minimum=0, maximum=1, step=0.05, interactive=True)
60
+ with gr.Row():
61
+ sampler_index = gr.Dropdown(label='Sampling method', elem_id=f"{mode}_sampling", choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index", interactive=True)
62
+ steps = gr.Slider(label="Sampling steps", minimum=1, maximum=150, step=1, elem_id=f"{mode}_steps", value=d.steps, interactive=True)
63
+
64
+ return width, height, prompt, n_prompt, cfg_scale, seed, processing_strength, fix_frame_strength, sampler_index, steps
65
+
66
+ def inputs_ui():
67
+ v2v_args = SimpleNamespace(**V2VArgs())
68
+ t2v_args = SimpleNamespace(**T2VArgs())
69
+ with gr.Tabs():
70
+ glo_sdcn_process_mode = gr.State(value='vid2vid')
71
+
72
+ with gr.Tab('vid2vid') as tab_vid2vid:
73
+ with gr.Row():
74
+ gr.HTML('Input video (each frame will be used as initial image for SD and as input image to CN): *REQUIRED')
75
+ with gr.Row():
76
+ v2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="vid_to_vid_chosen_file")
77
+
78
+ v2v_width, v2v_height, v2v_prompt, v2v_n_prompt, v2v_cfg_scale, v2v_seed, v2v_processing_strength, v2v_fix_frame_strength, v2v_sampler_index, v2v_steps = setup_common_values('vid2vid', v2v_args)
79
+
80
+ with gr.Accordion("Extra settings",open=False):
81
+ gr.HTML('# Occlusion mask params:')
82
+ with gr.Row():
83
+ with gr.Column(scale=1, variant='compact'):
84
+ v2v_occlusion_mask_blur = gr.Slider(label='Occlusion blur strength', minimum=0, maximum=10, step=0.1, value=3, interactive=True)
85
+ gr.HTML('')
86
+ v2v_occlusion_mask_trailing = gr.Checkbox(label="Occlusion trailing", info="Reduce ghosting but adds more flickering to the video", value=True, interactive=True)
87
+ with gr.Column(scale=1, variant='compact'):
88
+ v2v_occlusion_mask_flow_multiplier = gr.Slider(label='Occlusion flow multiplier', minimum=0, maximum=10, step=0.1, value=5, interactive=True)
89
+ v2v_occlusion_mask_difo_multiplier = gr.Slider(label='Occlusion diff origin multiplier', minimum=0, maximum=10, step=0.1, value=2, interactive=True)
90
+ v2v_occlusion_mask_difs_multiplier = gr.Slider(label='Occlusion diff styled multiplier', minimum=0, maximum=10, step=0.1, value=0, interactive=True)
91
+
92
+ with gr.Row():
93
+ with gr.Column(scale=1, variant='compact'):
94
+ gr.HTML('# Step 1 params:')
95
+ v2v_step_1_seed = gr.Number(label='Seed', value = -1, Interactive = True, precision=0)
96
+ gr.HTML('<br>')
97
+ v2v_step_1_blend_alpha = gr.Slider(label='Warped prev frame vs Current frame blend alpha', minimum=0, maximum=1, step=0.1, value=1, interactive=True)
98
+ v2v_step_1_processing_mode = gr.Radio(["Process full image then blend in occlusions", "Inpaint occlusions"], type="index", \
99
+ label="Processing mode", value="Process full image then blend in occlusions", interactive=True)
100
+
101
+
102
+ with gr.Column(scale=1, variant='compact'):
103
+ gr.HTML('# Step 2 params:')
104
+ v2v_step_2_seed = gr.Number(label='Seed', value = 8888, Interactive = True, precision=0)
105
+
106
+ with FormRow(elem_id="vid2vid_override_settings_row") as row:
107
+ v2v_override_settings = create_override_settings_dropdown("vid2vid", row)
108
+
109
+ with FormGroup(elem_id=f"script_container"):
110
+ v2v_custom_inputs = scripts.scripts_img2img.setup_ui()
111
+
112
+ with gr.Tab('txt2vid') as tab_txt2vid:
113
+ with gr.Row():
114
+ gr.HTML('Control video (each frame will be used as input image to CN): *NOT REQUIRED')
115
+ with gr.Row():
116
+ t2v_file = gr.File(label="Input video", interactive=True, file_count="single", file_types=["video"], elem_id="tex_to_vid_chosen_file")
117
+ t2v_init_image = gr.Image(label="Input image", interactive=True, file_count="single", file_types=["image"], elem_id="tex_to_vid_init_image")
118
+
119
+ t2v_width, t2v_height, t2v_prompt, t2v_n_prompt, t2v_cfg_scale, t2v_seed, t2v_processing_strength, t2v_fix_frame_strength, t2v_sampler_index, t2v_steps = setup_common_values('txt2vid', t2v_args)
120
+
121
+ with gr.Row():
122
+ t2v_length = gr.Slider(label='Length (in frames)', minimum=10, maximum=2048, step=10, value=40, interactive=True)
123
+ t2v_fps = gr.Slider(label='Video FPS', minimum=4, maximum=64, step=4, value=12, interactive=True)
124
+
125
+ gr.HTML('<br>')
126
+ t2v_cn_frame_send = gr.Radio(["None", "Current generated frame", "Previous generated frame", "Current reference video frame"], type="index", \
127
+ label="What frame should be send to CN?", value="None", interactive=True)
128
+
129
+ with FormRow(elem_id="txt2vid_override_settings_row") as row:
130
+ t2v_override_settings = create_override_settings_dropdown("txt2vid", row)
131
+
132
+ with FormGroup(elem_id=f"script_container"):
133
+ t2v_custom_inputs = scripts.scripts_txt2img.setup_ui()
134
+
135
+ tab_vid2vid.select(fn=lambda: 'vid2vid', inputs=[], outputs=[glo_sdcn_process_mode])
136
+ tab_txt2vid.select(fn=lambda: 'txt2vid', inputs=[], outputs=[glo_sdcn_process_mode])
137
+
138
+ return locals()
139
+
140
+ def process(*args):
141
+ msg = 'Done'
142
+ try:
143
+ if args[0] == 'vid2vid':
144
+ yield from vid2vid.start_process(*args)
145
+ elif args[0] == 'txt2vid':
146
+ yield from txt2vid.start_process(*args)
147
+ else:
148
+ msg = f"Unsupported processing mode: '{args[0]}'"
149
+ raise Exception(msg)
150
+ except Exception as error:
151
+ # handle the exception
152
+ msg = f"An exception occurred while trying to process the frame: {error}"
153
+ print(msg)
154
+ traceback.print_exc()
155
+
156
+ yield msg, gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Image.update(), gr.Video.update(), gr.Button.update(interactive=True), gr.Button.update(interactive=False)
157
+
158
+ def stop_process(*args):
159
+ utils.shared.is_interrupted = True
160
+ return gr.Button.update(interactive=False)
161
+
162
+
163
+
164
+ def on_ui_tabs():
165
+ modules.scripts.scripts_current = modules.scripts.scripts_img2img
166
+ modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True)
167
+
168
+ with gr.Blocks(analytics_enabled=False) as sdcnanim_interface:
169
+ components = {}
170
+
171
+ #dv = SimpleNamespace(**T2VOutputArgs())
172
+ with gr.Row(elem_id='sdcn-core').style(equal_height=False, variant='compact'):
173
+ with gr.Column(scale=1, variant='panel'):
174
+ #with gr.Tabs():
175
+ components = inputs_ui()
176
+
177
+ with gr.Accordion("Export settings", open=False):
178
+ export_settings_button = gr.Button('Export', elem_id=f"sdcn_export_settings_button")
179
+ export_setting_json = gr.Code(value='')
180
+
181
+
182
+ with gr.Column(scale=1, variant='compact'):
183
+ with gr.Row(variant='compact'):
184
+ run_button = gr.Button('Generate', elem_id=f"sdcn_anim_generate", variant='primary')
185
+ stop_button = gr.Button('Interrupt', elem_id=f"sdcn_anim_interrupt", variant='primary', interactive=False)
186
+
187
+ save_frames_check = gr.Checkbox(label="Save frames into a folder nearby a video (check it before running the generation if you also want to save frames separately)", value=True, interactive=True)
188
+ gr.HTML('<br>')
189
+
190
+ with gr.Column(variant="panel"):
191
+ sp_progress = gr.HTML(elem_id="sp_progress", value="")
192
+
193
+ with gr.Row(variant='compact'):
194
+ img_preview_curr_frame = gr.Image(label='Current frame', elem_id=f"img_preview_curr_frame", type='pil').style(height=240)
195
+ img_preview_curr_occl = gr.Image(label='Current occlusion', elem_id=f"img_preview_curr_occl", type='pil').style(height=240)
196
+ with gr.Row(variant='compact'):
197
+ img_preview_prev_warp = gr.Image(label='Previous frame warped', elem_id=f"img_preview_curr_frame", type='pil').style(height=240)
198
+ img_preview_processed = gr.Image(label='Processed', elem_id=f"img_preview_processed", type='pil').style(height=240)
199
+
200
+ video_preview = gr.Video(interactive=False)
201
+
202
+ with gr.Row(variant='compact'):
203
+ dummy_component = gr.Label(visible=False)
204
+
205
+ components['glo_save_frames_check'] = save_frames_check
206
+
207
+ # Define parameters for the action methods.
208
+ utils.shared.v2v_custom_inputs_size = len(components['v2v_custom_inputs'])
209
+ utils.shared.t2v_custom_inputs_size = len(components['t2v_custom_inputs'])
210
+ #print('v2v_custom_inputs', len(components['v2v_custom_inputs']), components['v2v_custom_inputs'])
211
+ #print('t2v_custom_inputs', len(components['t2v_custom_inputs']), components['t2v_custom_inputs'])
212
+ method_inputs = [components[name] for name in utils.get_component_names()] + components['v2v_custom_inputs'] + components['t2v_custom_inputs']
213
+
214
+ method_outputs = [
215
+ sp_progress,
216
+ img_preview_curr_frame,
217
+ img_preview_curr_occl,
218
+ img_preview_prev_warp,
219
+ img_preview_processed,
220
+ video_preview,
221
+ run_button,
222
+ stop_button,
223
+ ]
224
+
225
+ run_button.click(
226
+ fn=process, #wrap_gradio_gpu_call(start_process, extra_outputs=[None, '', '']),
227
+ inputs=method_inputs,
228
+ outputs=method_outputs,
229
+ show_progress=True,
230
+ )
231
+
232
+ stop_button.click(
233
+ fn=stop_process,
234
+ outputs=[stop_button],
235
+ show_progress=False
236
+ )
237
+
238
+ export_settings_button.click(
239
+ fn=utils.export_settings,
240
+ inputs=method_inputs,
241
+ outputs=[export_setting_json],
242
+ show_progress=False
243
+ )
244
+
245
+ modules.scripts.scripts_current = None
246
+
247
+ # define queue - required for generators
248
+ sdcnanim_interface.queue(concurrency_count=1)
249
+ return [(sdcnanim_interface, "SD-CN-Animation", "sd_cn_animation_interface")]
250
+
251
+
252
+ script_callbacks.on_ui_tabs(on_ui_tabs)