Koi953215 commited on
Commit
f9cbc98
1 Parent(s): e1c55e6

init commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. NaRCan_model.py +221 -0
  2. app.py +362 -0
  3. canonical/bear.png +0 -0
  4. canonical/boat.png +0 -0
  5. canonical/cactus.png +0 -0
  6. canonical/corgi.png +0 -0
  7. canonical/gold-fish.png +0 -0
  8. canonical/koolshooters.png +0 -0
  9. canonical/overlook-the-ocean.png +0 -0
  10. canonical/rotate.png +0 -0
  11. canonical/shark-ocean.png +0 -0
  12. canonical/surf.png +0 -0
  13. canonical/woman-drink.png +0 -0
  14. canonical/yacht.png +0 -0
  15. examples/bear.mp4 +0 -0
  16. examples/boat.mp4 +0 -0
  17. examples/cactus.mp4 +0 -0
  18. examples/corgi.mp4 +0 -0
  19. examples/gold-fish.mp4 +0 -0
  20. examples/koolshooters.mp4 +0 -0
  21. examples/overlook-the-ocean.mp4 +0 -0
  22. examples/rotate.mp4 +0 -0
  23. examples/shark-ocean.mp4 +0 -0
  24. examples/surf.mp4 +0 -0
  25. examples/woman-drink.mp4 +0 -0
  26. examples/yacht.mp4 +0 -0
  27. examples_frames/bear/00040.jpg +0 -0
  28. examples_frames/bear/00041.jpg +0 -0
  29. examples_frames/bear/00042.jpg +0 -0
  30. examples_frames/bear/00043.jpg +0 -0
  31. examples_frames/bear/00044.jpg +0 -0
  32. examples_frames/bear/00045.jpg +0 -0
  33. examples_frames/bear/00046.jpg +0 -0
  34. examples_frames/bear/00047.jpg +0 -0
  35. examples_frames/bear/00048.jpg +0 -0
  36. examples_frames/bear/00049.jpg +0 -0
  37. examples_frames/bear/00050.jpg +0 -0
  38. examples_frames/bear/00051.jpg +0 -0
  39. examples_frames/bear/00052.jpg +0 -0
  40. examples_frames/bear/00053.jpg +0 -0
  41. examples_frames/bear/00054.jpg +0 -0
  42. examples_frames/bear/00055.jpg +0 -0
  43. examples_frames/bear/00056.jpg +0 -0
  44. examples_frames/bear/00057.jpg +0 -0
  45. examples_frames/bear/00058.jpg +0 -0
  46. examples_frames/bear/00059.jpg +0 -0
  47. examples_frames/bear/00060.jpg +0 -0
  48. examples_frames/bear/00061.jpg +0 -0
  49. examples_frames/bear/00062.jpg +0 -0
  50. examples_frames/bear/00063.jpg +0 -0
NaRCan_model.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import numpy as np
4
+ import math
5
+ # import tinycudann as tcnn
6
+
7
+
8
+ class SineLayer(nn.Module):
9
+ # See paper sec. 3.2, final paragraph, and supplement Sec. 1.5 for discussion of omega_0.
10
+
11
+ # If is_first=True, omega_0 is a frequency factor which simply multiplies the activations before the
12
+ # nonlinearity. Different signals may require different omega_0 in the first layer - this is a
13
+ # hyperparameter.
14
+
15
+ # If is_first=False, then the weights will be divided by omega_0 so as to keep the magnitude of
16
+ # activations constant, but boost gradients to the weight matrix (see supplement Sec. 1.5)
17
+
18
+ def __init__(self, in_features, out_features, bias=True,
19
+ is_first=False, omega_0=30):
20
+ super().__init__()
21
+ self.omega_0 = omega_0
22
+ self.is_first = is_first
23
+
24
+ self.in_features = in_features
25
+ self.linear = nn.Linear(in_features, out_features, bias=bias)
26
+
27
+ self.init_weights()
28
+
29
+ def init_weights(self):
30
+ with torch.no_grad():
31
+ if self.is_first:
32
+ self.linear.weight.uniform_(-1 / self.in_features,
33
+ 1 / self.in_features)
34
+ else:
35
+ self.linear.weight.uniform_(-np.sqrt(6 / self.in_features) / self.omega_0,
36
+ np.sqrt(6 / self.in_features) / self.omega_0)
37
+
38
+ def forward(self, input):
39
+ return torch.sin(self.omega_0 * self.linear(input))
40
+
41
+ def forward_with_intermediate(self, input):
42
+ # For visualization of activation distributions
43
+ intermediate = self.omega_0 * self.linear(input)
44
+ return torch.sin(intermediate), intermediate
45
+
46
+
47
+ class Siren(nn.Module):
48
+ def __init__(self, in_features, hidden_features, hidden_layers, out_features, outermost_linear=False,
49
+ first_omega_0=30, hidden_omega_0=30.):
50
+ super().__init__()
51
+
52
+ self.net = []
53
+ self.net.append(SineLayer(in_features, hidden_features,
54
+ is_first=True, omega_0=first_omega_0))
55
+
56
+ for i in range(hidden_layers):
57
+ self.net.append(SineLayer(hidden_features, hidden_features,
58
+ is_first=False, omega_0=hidden_omega_0))
59
+
60
+ if outermost_linear:
61
+ final_linear = nn.Linear(hidden_features, out_features)
62
+
63
+ with torch.no_grad():
64
+ final_linear.weight.uniform_(-np.sqrt(6 / hidden_features) / hidden_omega_0,
65
+ np.sqrt(6 / hidden_features) / hidden_omega_0)
66
+
67
+ self.net.append(final_linear)
68
+ else:
69
+ self.net.append(SineLayer(hidden_features, out_features,
70
+ is_first=False, omega_0=hidden_omega_0))
71
+
72
+ self.net = nn.Sequential(*self.net)
73
+
74
+ def forward(self, coords):
75
+ output = self.net(coords)
76
+ return output
77
+
78
+
79
+ class Homography(nn.Module):
80
+ def __init__(self, in_features=1, hidden_features=256, hidden_layers=1):
81
+ super().__init__()
82
+ out_features = 8
83
+
84
+ self.net = []
85
+ self.net.append(nn.Linear(in_features, hidden_features))
86
+ self.net.append(nn.ReLU(inplace=True))
87
+ for i in range(hidden_layers):
88
+ self.net.append(nn.Linear(hidden_features, hidden_features))
89
+ self.net.append(nn.ReLU(inplace=True))
90
+ self.net.append(nn.Linear(hidden_features, out_features))
91
+ self.net = nn.Sequential(*self.net)
92
+
93
+ self.init_weights()
94
+
95
+ def init_weights(self):
96
+ with torch.no_grad():
97
+ self.net[-1].bias.copy_(torch.Tensor([1., 0., 0., 0., 1., 0., 0., 0.]))
98
+
99
+ def forward(self, coords):
100
+ output = self.net(coords)
101
+ return output
102
+
103
+
104
+ class Annealed(nn.Module):
105
+ def __init__(self, in_channels, annealed_step, annealed_begin_step=0, identity=True):
106
+ """
107
+ Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...)
108
+ in_channels: number of input channels (3 for both xyz and direction)
109
+ """
110
+ super(Annealed, self).__init__()
111
+ self.N_freqs = 16
112
+ self.in_channels = in_channels
113
+ self.annealed = True
114
+ self.annealed_step = annealed_step
115
+ self.annealed_begin_step = annealed_begin_step
116
+
117
+ self.index = torch.linspace(0, self.N_freqs - 1, self.N_freqs)
118
+ self.identity = identity
119
+
120
+ self.index_2 = self.index.view(-1, 1).repeat(1, 2).view(-1)
121
+
122
+ def forward(self, x_embed, step):
123
+ """
124
+ Embeds x to (x, sin(2^k x), cos(2^k x), ...)
125
+ Different from the paper, "x" is also in the output
126
+ See https://github.com/bmild/nerf/issues/12
127
+
128
+ Inputs:
129
+ x: (B, self.in_channels)
130
+
131
+ Outputs:
132
+ out: (B, self.out_channels)
133
+ """
134
+ use_PE = False
135
+
136
+ if self.annealed_begin_step == 0:
137
+ # calculate the w for each freq bands
138
+ alpha = self.N_freqs * step / float(self.annealed_step)
139
+ else:
140
+ if step <= self.annealed_begin_step:
141
+ alpha = 0
142
+ else:
143
+ alpha = (self.N_freqs) * (step - self.annealed_begin_step) / float(
144
+ self.annealed_step)
145
+
146
+ w = (1 - torch.cos(math.pi * torch.clamp(alpha * torch.ones_like(self.index_2) - self.index_2, 0, 1))) / 2
147
+
148
+ if use_PE:
149
+ w[16:] = w[:16]
150
+
151
+ out = x_embed * w.to(x_embed.device)
152
+
153
+ return out
154
+
155
+
156
+ class BARF_PE(nn.Module):
157
+ def __init__(self, config):
158
+ super().__init__()
159
+ self.encoder = tcnn.Encoding(n_input_dims=2,
160
+ encoding_config=config["positional encoding"])
161
+ self.decoder = tcnn.Network(n_input_dims=self.encoder.n_output_dims +
162
+ 2,
163
+ n_output_dims=3,
164
+ network_config=config["BARF network"])
165
+
166
+ def forward(self, x, step=0, aneal_func=None):
167
+ input = x
168
+ input = self.encoder(input)
169
+ if aneal_func is not None:
170
+ input = torch.cat([x, aneal_func(input,step)], dim=-1)
171
+ else:
172
+ input = torch.cat([x, input], dim=-1)
173
+
174
+ weight = torch.ones(input.shape[-1], device=input.device).cuda()
175
+ x = self.decoder(weight * input)
176
+ return x
177
+
178
+
179
+ class Deform_Hash3d(nn.Module):
180
+ def __init__(self, config):
181
+ super().__init__()
182
+ self.encoder = tcnn.Encoding(n_input_dims=3,
183
+ encoding_config=config["encoding_deform3d"])
184
+ self.decoder = nn.Sequential(nn.Linear(self.encoder.n_output_dims + 3, 256),
185
+ nn.ReLU(),
186
+ nn.Linear(256, 256),
187
+ nn.ReLU(),
188
+ nn.Linear(256, 256),
189
+ nn.ReLU(),
190
+ nn.Linear(256, 256),
191
+ nn.ReLU(),
192
+ nn.Linear(256, 256),
193
+ nn.ReLU(),
194
+ nn.Linear(256, 256),
195
+ nn.ReLU(),
196
+ nn.Linear(256, 2)
197
+ )
198
+
199
+ def forward(self, x, step=0, aneal_func=None):
200
+ input = x
201
+ input = self.encoder(input)
202
+ if aneal_func is not None:
203
+ input = torch.cat([x, aneal_func(input,step)], dim=-1)
204
+ else:
205
+ input = torch.cat([x, input], dim=-1)
206
+
207
+ weight = torch.ones(input.shape[-1], device=input.device).cuda()
208
+ x = self.decoder(weight * input) / 5
209
+
210
+ return x
211
+
212
+
213
+ class Deform_Hash3d_Warp(nn.Module):
214
+ def __init__(self, config):
215
+ super().__init__()
216
+ self.Deform_Hash3d = Deform_Hash3d(config)
217
+
218
+ def forward(self, xyt_norm, step=0,aneal_func=None):
219
+ x = self.Deform_Hash3d(xyt_norm,step=step, aneal_func=aneal_func)
220
+
221
+ return x
app.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import torch
4
+ import cv2
5
+ import os
6
+ import imageio
7
+ from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
8
+ from controlnet_aux import LineartDetector
9
+ from functools import partial
10
+ from PIL import Image
11
+ from torch.utils.data import DataLoader, Dataset
12
+ from torchvision.transforms import Compose, ToTensor, Normalize, Resize
13
+
14
+ from NaRCan_model import Homography, Siren
15
+ from util import get_mgrid, apply_homography, jacobian, VideoFitting, TestVideoFitting
16
+
17
+
18
+
19
+ def get_example():
20
+ case = [
21
+ [
22
+ 'examples/bear.mp4',
23
+ ],
24
+ [
25
+ 'examples/boat.mp4',
26
+ ],
27
+ [
28
+ 'examples/woman-drink.mp4',
29
+ ],
30
+ [
31
+ 'examples/corgi.mp4',
32
+ ],
33
+ [
34
+ 'examples/yacht.mp4',
35
+ ],
36
+ [
37
+ 'examples/koolshooters.mp4',
38
+ ],
39
+ [
40
+ 'examples/overlook-the-ocean.mp4',
41
+ ],
42
+ [
43
+ 'examples/rotate.mp4',
44
+ ],
45
+ [
46
+ 'examples/shark-ocean.mp4',
47
+ ],
48
+ [
49
+ 'examples/surf.mp4',
50
+ ],
51
+ [
52
+ 'examples/cactus.mp4',
53
+ ],
54
+ [
55
+ 'examples/gold-fish.mp4',
56
+ ]
57
+ ]
58
+ return case
59
+
60
+
61
+ def set_default_prompt(video_name):
62
+ video_to_prompt = {
63
+ 'bear.mp4': 'bear, Van Gogh Style',
64
+ 'boat.mp4': 'a burning boat sails on lava',
65
+ 'cactus.mp4': 'cactus, made of paper',
66
+ 'corgi.mp4': 'a hellhound',
67
+ 'gold-fish.mp4': 'Goldfish in the Milky Way',
68
+ 'koolshooters.mp4': 'Avatar',
69
+ 'overlook-the-ocean.mp4': 'ocean, pixel style',
70
+ 'rotate.mp4': 'turbine engine',
71
+ 'shark-ocean.mp4': 'A sleek shark, cartoon style',
72
+ 'surf.mp4': 'Sailing, The background is a large white cloud, sketch style',
73
+ 'woman-drink.mp4': 'a drinking zombie',
74
+ 'yacht.mp4': 'yacht, cyberpunk style',
75
+ }
76
+ return video_to_prompt.get(video_name, '')
77
+
78
+
79
+ def update_prompt(input_video):
80
+ video_name = input_video.split('/')[-1]
81
+ return set_default_prompt(video_name)
82
+
83
+
84
+ # Map videos to corresponding images
85
+ video_to_image = {
86
+ 'bear.mp4': ['canonical/bear.png', 'pth_file/bear', 'examples_frames/bear'],
87
+ 'boat.mp4': ['canonical/boat.png', 'pth_file/boat', 'examples_frames/boat'],
88
+ 'cactus.mp4': ['canonical/cactus.png', 'pth_file/cactus', 'examples_frames/cactus'],
89
+ 'corgi.mp4': ['canonical/corgi.png', 'pth_file/corgi', 'examples_frames/corgi'],
90
+ 'gold-fish.mp4': ['canonical/gold-fish.png', 'pth_file/gold-fish', 'examples_frames/gold-fish'],
91
+ 'koolshooters.mp4': ['canonical/koolshooters.png', 'pth_file/koolshooters', 'examples_frames/koolshooters'],
92
+ 'overlook-the-ocean.mp4': ['canonical/overlook-the-ocean.png', 'pth_file/overlook-the-ocean', 'examples_frames/overlook-the-ocean'],
93
+ 'rotate.mp4': ['canonical/rotate.png', 'pth_file/rotate', 'examples_frames/rotate'],
94
+ 'shark-ocean.mp4': ['canonical/shark-ocean.png', 'pth_file/shark-ocean', 'examples_frames/shark-ocean'],
95
+ 'surf.mp4': ['canonical/surf.png', 'pth_file/surf', 'examples_frames/surf'],
96
+ 'woman-drink.mp4': ['canonical/woman-drink.png', 'pth_file/woman-drink', 'examples_frames/woman-drink'],
97
+ 'yacht.mp4': ['canonical/yacht.png', 'pth_file/yacht', 'examples_frames/yacht'],
98
+ }
99
+
100
+
101
+ def images_to_video(image_list, output_path, fps=10):
102
+ # Convert PIL Images to numpy arrays
103
+ frames = [np.array(img).astype(np.uint8) for img in image_list]
104
+ frames = frames[:20]
105
+
106
+ # Create video writer
107
+ writer = imageio.get_writer(output_path, fps=fps, codec='libx264')
108
+
109
+ for frame in frames:
110
+ writer.append_data(frame)
111
+
112
+ writer.close()
113
+
114
+
115
+ def NaRCan_make_video(edit_canonical, pth_path, frames_path):
116
+ # load NaRCan model
117
+ checkpoint_g_old = torch.load(os.path.join(pth_path, "homography_g.pth"))
118
+ checkpoint_g = torch.load(os.path.join(pth_path, "mlp_g.pth"))
119
+ g_old = Homography(hidden_features=256, hidden_layers=2).cuda()
120
+ g = Siren(in_features=3, out_features=2, hidden_features=256,
121
+ hidden_layers=5, outermost_linear=True).cuda()
122
+
123
+ g_old.load_state_dict(checkpoint_g_old)
124
+ g.load_state_dict(checkpoint_g)
125
+
126
+ g_old.eval()
127
+ g.eval()
128
+
129
+ transform = Compose([
130
+ Resize(512),
131
+ ToTensor(),
132
+ Normalize(torch.Tensor([0.5, 0.5, 0.5]), torch.Tensor([0.5, 0.5, 0.5]))
133
+ ])
134
+ v = TestVideoFitting(frames_path, transform)
135
+ videoloader = DataLoader(v, batch_size=1, pin_memory=True, num_workers=0)
136
+
137
+ model_input, ground_truth = next(iter(videoloader))
138
+ model_input, ground_truth = model_input[0].cuda(), ground_truth[0].cuda()
139
+
140
+ myoutput = None
141
+ data_len = len(os.listdir(frames_path))
142
+
143
+ with torch.no_grad():
144
+ batch_size = (v.H * v.W)
145
+ for step in range(data_len):
146
+ start = (step * batch_size) % len(model_input)
147
+ end = min(start + batch_size, len(model_input))
148
+
149
+ # get the deformation
150
+ xy, t = model_input[start:end, :-1], model_input[start:end, [-1]]
151
+ xyt = model_input[start:end]
152
+ h_old = apply_homography(xy, g_old(t))
153
+ h = g(xyt)
154
+ xy_ = h_old + h
155
+
156
+ # use canonical to reconstruct
157
+ w, h = v.W, v.H
158
+ canonical_img = np.array(edit_canonical.convert('RGB'))
159
+ canonical_img = torch.from_numpy(canonical_img).float().cuda()
160
+ h_c, w_c = canonical_img.shape[:2]
161
+ grid_new = xy_.clone()
162
+ grid_new[..., 1] = xy_[..., 0] / 1.5
163
+ grid_new[..., 0] = xy_[..., 1] / 2.0
164
+
165
+ if len(canonical_img.shape) == 3:
166
+ canonical_img = canonical_img.unsqueeze(0)
167
+ results = torch.nn.functional.grid_sample(
168
+ canonical_img.permute(0, 3, 1, 2),
169
+ grid_new.unsqueeze(1).unsqueeze(0),
170
+ mode='bilinear',
171
+ padding_mode='border')
172
+ o = results.squeeze().permute(1,0)
173
+
174
+ if step == 0:
175
+ myoutput = o
176
+
177
+ else:
178
+ myoutput = torch.cat([myoutput, o])
179
+
180
+ myoutput = myoutput.reshape(512, 512, data_len, 3).permute(2, 0, 1, 3).clone().detach().cpu().numpy().astype(np.float32)
181
+ # myoutput = np.clip(myoutput, -1, 1) * 0.5 + 0.5
182
+
183
+ for i in range(len(myoutput)):
184
+ myoutput[i] = Image.fromarray(np.uint8(myoutput[i])).resize((512, 512)) #854, 480
185
+
186
+ edit_video_path = f'NaRCan_fps_10.mp4'
187
+ images_to_video(myoutput, edit_video_path)
188
+
189
+ return edit_video_path
190
+
191
+
192
+ def edit_with_pnp(input_video, prompt, num_steps, guidance_scale, seed, n_prompt, control_type="Lineart"):
193
+ video_name = input_video.split('/')[-1]
194
+ if video_name in video_to_image:
195
+ image_path = video_to_image[video_name][0]
196
+ pth_path = video_to_image[video_name][1]
197
+ frames_path = video_to_image[video_name][2]
198
+ else:
199
+ return None
200
+
201
+ if control_type == "Lineart":
202
+ # Load the control net model for lineart
203
+ controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_lineart", torch_dtype=torch.float16)
204
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
205
+ "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
206
+ )
207
+ pipe.to("cuda")
208
+ # lineart
209
+ processor = LineartDetector.from_pretrained("lllyasviel/Annotators")
210
+ processor_partial = partial(processor, coarse=False)
211
+ size_ = 768
212
+ canonical_image = Image.open(image_path)
213
+ ori_size = canonical_image.size
214
+ image = processor_partial(canonical_image.resize((size_, size_)), detect_resolution=size_, image_resolution=size_)
215
+ image = image.resize(ori_size, resample=Image.BILINEAR)
216
+
217
+ generator = torch.manual_seed(seed) if seed != -1 else None
218
+ output_images = pipe(
219
+ prompt=prompt,
220
+ image=image,
221
+ num_inference_steps=num_steps,
222
+ guidance_scale=guidance_scale,
223
+ negative_prompt=n_prompt,
224
+ generator=generator
225
+ ).images
226
+ # output_images[0] = output_images[0].resize(ori_size, resample=Image.BILINEAR)
227
+
228
+ else:
229
+ # Load the control net model for canny
230
+ controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
231
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
232
+ "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
233
+ )
234
+ pipe.to("cuda")
235
+ # canny
236
+ canonical_image = cv2.imread(image_path)
237
+ canonical_image = cv2.cvtColor(canonical_image, cv2.COLOR_BGR2RGB)
238
+ image = cv2.cvtColor(canonical_image, cv2.COLOR_RGB2GRAY)
239
+ image = cv2.Canny(image, 100, 200)
240
+ image = image[:, :, None]
241
+ image = np.concatenate([image, image, image], axis=2)
242
+ image = Image.fromarray(image)
243
+
244
+ generator = torch.manual_seed(seed) if seed != -1 else None
245
+ output_images = pipe(
246
+ prompt=prompt,
247
+ image=image,
248
+ num_inference_steps=num_steps,
249
+ guidance_scale=guidance_scale,
250
+ negative_prompt=n_prompt,
251
+ generator=generator
252
+ ).images
253
+
254
+ edit_video_path = NaRCan_make_video(output_images[0], pth_path, frames_path)
255
+
256
+ # Here we return the first output image as the result
257
+ return edit_video_path
258
+
259
+
260
+ ########
261
+ # demo #
262
+ ########
263
+
264
+
265
+ intro = """
266
+ <div style="text-align:center">
267
+ <h1 style="font-weight: 1400; text-align: center; margin-bottom: 7px;">
268
+ NaRCan - <small>Natural Refined Canonical Image</small>
269
+ </h1>
270
+ <span>[<a target="_blank" href="https://koi953215.github.io/NaRCan_page/">Project page</a>], [<a target="_blank" href="https://huggingface.co/papers/2406.06523">Paper</a>]</span>
271
+ <div style="display:flex; justify-content: center;margin-top: 0.5em">Each edit takes ~10 sec </div>
272
+ </div>
273
+ """
274
+
275
+
276
+
277
+ with gr.Blocks(css="style.css") as demo:
278
+
279
+ gr.HTML(intro)
280
+ frames = gr.State()
281
+ inverted_latents = gr.State()
282
+ latents = gr.State()
283
+ zs = gr.State()
284
+ do_inversion = gr.State(value=True)
285
+
286
+ with gr.Row():
287
+ input_video = gr.Video(label="Input Video", interactive=False, elem_id="input_video", value='examples/bear.mp4')
288
+ output_video = gr.Video(label="Edited Video", interactive=False, elem_id="output_video")
289
+ input_video.style(height=365, width=365)
290
+ output_video.style(height=365, width=365)
291
+
292
+
293
+ with gr.Row():
294
+ prompt = gr.Textbox(
295
+ label="Describe your edited video",
296
+ max_lines=1,
297
+ value="bear, Van Gogh Style"
298
+ # placeholder="bear, Van Gogh Style"
299
+ )
300
+
301
+
302
+ with gr.Row():
303
+ run_button = gr.Button("Edit your video!", visible=True)
304
+
305
+ max_images = 12
306
+ default_num_images = 3
307
+ with gr.Accordion('Advanced options', open=False):
308
+ control_type = gr.Dropdown(
309
+ ["Canny", "Lineart"],
310
+ label="Control Type",
311
+ info="Canny or Lineart",
312
+ value="Lineart"
313
+ )
314
+ num_steps = gr.Slider(label='Steps',
315
+ minimum=1,
316
+ maximum=100,
317
+ value=20,
318
+ step=1)
319
+ guidance_scale = gr.Slider(label='Guidance Scale',
320
+ minimum=0.1,
321
+ maximum=30.0,
322
+ value=9.0,
323
+ step=0.1)
324
+ seed = gr.Slider(label='Seed',
325
+ minimum=-1,
326
+ maximum=2147483647,
327
+ step=1,
328
+ randomize=True)
329
+ n_prompt = gr.Textbox(
330
+ label='Negative Prompt',
331
+ value=""
332
+ )
333
+
334
+ input_video.change(
335
+ fn = update_prompt,
336
+ inputs = [input_video],
337
+ outputs = [prompt],
338
+ queue = False)
339
+
340
+ run_button.click(fn = edit_with_pnp,
341
+ inputs = [input_video,
342
+ prompt,
343
+ num_steps,
344
+ guidance_scale,
345
+ seed,
346
+ n_prompt,
347
+ control_type,
348
+ ],
349
+ outputs = [output_video]
350
+ )
351
+
352
+ gr.Examples(
353
+ examples=get_example(),
354
+ label='Examples',
355
+ inputs=[input_video],
356
+ outputs=[output_video],
357
+ examples_per_page=8
358
+ )
359
+
360
+ demo.queue()
361
+
362
+ demo.launch(share=True)
canonical/bear.png ADDED
canonical/boat.png ADDED
canonical/cactus.png ADDED
canonical/corgi.png ADDED
canonical/gold-fish.png ADDED
canonical/koolshooters.png ADDED
canonical/overlook-the-ocean.png ADDED
canonical/rotate.png ADDED
canonical/shark-ocean.png ADDED
canonical/surf.png ADDED
canonical/woman-drink.png ADDED
canonical/yacht.png ADDED
examples/bear.mp4 ADDED
Binary file (830 kB). View file
 
examples/boat.mp4 ADDED
Binary file (520 kB). View file
 
examples/cactus.mp4 ADDED
Binary file (293 kB). View file
 
examples/corgi.mp4 ADDED
Binary file (170 kB). View file
 
examples/gold-fish.mp4 ADDED
Binary file (402 kB). View file
 
examples/koolshooters.mp4 ADDED
Binary file (375 kB). View file
 
examples/overlook-the-ocean.mp4 ADDED
Binary file (993 kB). View file
 
examples/rotate.mp4 ADDED
Binary file (719 kB). View file
 
examples/shark-ocean.mp4 ADDED
Binary file (442 kB). View file
 
examples/surf.mp4 ADDED
Binary file (463 kB). View file
 
examples/woman-drink.mp4 ADDED
Binary file (495 kB). View file
 
examples/yacht.mp4 ADDED
Binary file (743 kB). View file
 
examples_frames/bear/00040.jpg ADDED
examples_frames/bear/00041.jpg ADDED
examples_frames/bear/00042.jpg ADDED
examples_frames/bear/00043.jpg ADDED
examples_frames/bear/00044.jpg ADDED
examples_frames/bear/00045.jpg ADDED
examples_frames/bear/00046.jpg ADDED
examples_frames/bear/00047.jpg ADDED
examples_frames/bear/00048.jpg ADDED
examples_frames/bear/00049.jpg ADDED
examples_frames/bear/00050.jpg ADDED
examples_frames/bear/00051.jpg ADDED
examples_frames/bear/00052.jpg ADDED
examples_frames/bear/00053.jpg ADDED
examples_frames/bear/00054.jpg ADDED
examples_frames/bear/00055.jpg ADDED
examples_frames/bear/00056.jpg ADDED
examples_frames/bear/00057.jpg ADDED
examples_frames/bear/00058.jpg ADDED
examples_frames/bear/00059.jpg ADDED
examples_frames/bear/00060.jpg ADDED
examples_frames/bear/00061.jpg ADDED
examples_frames/bear/00062.jpg ADDED
examples_frames/bear/00063.jpg ADDED