kadirnar commited on
Commit
debc5f3
1 Parent(s): 62f6167
app.py CHANGED
@@ -1,5 +1,5 @@
1
  from inpaint_zoom.app.zoom_out_app import stable_diffusion_zoom_out_app
2
- from inpaint_zoom.app.zoom_in_app import stable_diffusion_zoom_in_app
3
 
4
  import gradio as gr
5
 
@@ -23,7 +23,7 @@ with app:
23
  with gr.Row():
24
  with gr.Column():
25
  with gr.Tab('Zoom In'):
26
- stable_diffusion_zoom_in_app()
27
  with gr.Tab('Zoom Out'):
28
  stable_diffusion_zoom_out_app()
29
 
1
  from inpaint_zoom.app.zoom_out_app import stable_diffusion_zoom_out_app
2
+ from inpaint_zoom.app.zoom_in_app import StableDiffusionZoomIn
3
 
4
  import gradio as gr
5
 
23
  with gr.Row():
24
  with gr.Column():
25
  with gr.Tab('Zoom In'):
26
+ StableDiffusionZoomIn.app()
27
  with gr.Tab('Zoom Out'):
28
  stable_diffusion_zoom_out_app()
29
 
inpaint_zoom/app/zoom_in_app.py CHANGED
@@ -22,174 +22,187 @@ stable_paint_negative_prompt_list = [
22
  "lurry, bad art, blurred, text, watermark",
23
  ]
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- def stable_diffusion_zoom_in(
27
- model_id,
28
- prompt,
29
- negative_prompt,
30
- guidance_scale,
31
- num_inference_steps,
32
- ):
33
-
34
- pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16")
35
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
36
- pipe = pipe.to("cuda")
37
-
38
- pipe.safety_checker = dummy
39
- pipe.enable_attention_slicing()
40
- g_cuda = torch.Generator(device='cuda')
41
-
42
- num_init_images = 2
43
- seed = 9999
44
- height = 512
45
- width = height
46
-
47
- current_image = Image.new(mode="RGBA", size=(height, width))
48
- mask_image = np.array(current_image)[:,:,3]
49
- mask_image = Image.fromarray(255-mask_image).convert("RGB")
50
- current_image = current_image.convert("RGB")
51
-
52
- init_images = pipe(prompt=[prompt]*num_init_images,
53
- negative_prompt=[negative_prompt]*num_init_images,
54
- image=current_image,
55
- guidance_scale = guidance_scale,
56
- height = height,
57
- width = width,
58
- generator = g_cuda.manual_seed(seed),
59
- mask_image=mask_image,
60
- num_inference_steps=num_inference_steps)[0]
61
 
 
 
 
 
62
 
63
- image_grid(init_images, rows=1, cols=num_init_images)
 
 
 
 
 
 
 
 
64
 
65
 
66
- init_image_selected = 1 #@param
67
- if num_init_images == 1:
68
- init_image_selected = 0
69
- else:
70
- init_image_selected = init_image_selected - 1
71
 
72
- num_outpainting_steps = 20 #@param
73
- mask_width = 128 #@param
74
- num_interpol_frames = 30 #@param
75
 
76
- current_image = init_images[init_image_selected]
77
- all_frames = []
78
- all_frames.append(current_image)
 
 
79
 
80
- for i in range(num_outpainting_steps):
81
- print('Generating image: ' + str(i+1) + ' / ' + str(num_outpainting_steps))
 
82
 
83
- prev_image_fix = current_image
 
 
84
 
85
- prev_image = shrink_and_paste_on_blank(current_image, mask_width)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- current_image = prev_image
88
 
89
- #create mask (black image with white mask_width width edges)
90
- mask_image = np.array(current_image)[:,:,3]
91
- mask_image = Image.fromarray(255-mask_image).convert("RGB")
 
 
 
92
 
93
- #inpainting step
94
- current_image = current_image.convert("RGB")
95
- images = pipe(prompt=prompt,
96
- negative_prompt=negative_prompt,
97
- image=current_image,
98
- guidance_scale = guidance_scale,
99
- height = height,
100
- width = width,
101
- #this can make the whole thing deterministic but the output less exciting
102
- #generator = g_cuda.manual_seed(seed),
103
- mask_image=mask_image,
104
- num_inference_steps=num_inference_steps)[0]
105
- current_image = images[0]
106
- current_image.paste(prev_image, mask=prev_image)
107
-
108
- #interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
109
- for j in range(num_interpol_frames - 1):
110
- interpol_image = current_image
111
- interpol_width = round(
112
- (1- ( 1-2*mask_width/height )**( 1-(j+1)/num_interpol_frames ) )*height/2
113
- )
114
- interpol_image = interpol_image.crop((interpol_width,
115
- interpol_width,
116
- width - interpol_width,
117
- height - interpol_width))
118
-
119
- interpol_image = interpol_image.resize((height, width))
120
-
121
- #paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
122
- interpol_width2 = round(
123
- ( 1 - (height-2*mask_width) / (height-2*interpol_width) ) / 2*height
124
- )
125
- prev_image_fix_crop = shrink_and_paste_on_blank(prev_image_fix, interpol_width2)
126
- interpol_image.paste(prev_image_fix_crop, mask = prev_image_fix_crop)
127
-
128
- all_frames.append(interpol_image)
129
 
130
- all_frames.append(current_image)
131
 
132
- video_file_name = "infinite_zoom_out"
133
- fps = 30
134
- save_path = video_file_name + ".mp4"
135
- write_video(save_path, all_frames, fps)
136
- return save_path
137
 
138
 
139
 
140
- def stable_diffusion_zoom_in_app():
141
- with gr.Blocks():
142
- with gr.Row():
143
- with gr.Column():
144
- text2image_in_model_path = gr.Dropdown(
145
- choices=stable_paint_model_list,
146
- value=stable_paint_model_list[0],
147
- label='Text-Image Model Id'
148
- )
149
-
150
- text2image_in_prompt = gr.Textbox(
151
- lines=1,
152
- value=stable_paint_prompt_list[0],
153
- label='Prompt'
154
- )
155
-
156
- text2image_in_negative_prompt = gr.Textbox(
157
- lines=1,
158
- value=stable_paint_negative_prompt_list[0],
159
- label='Negative Prompt'
160
- )
161
-
162
- with gr.Accordion("Advanced Options", open=False):
163
- text2image_in_guidance_scale = gr.Slider(
164
- minimum=0.1,
165
- maximum=15,
166
- step=0.1,
167
- value=7.5,
168
- label='Guidance Scale'
169
  )
170
 
171
- text2image_in_num_inference_step = gr.Slider(
172
- minimum=1,
173
- maximum=100,
174
- step=1,
175
- value=50,
176
- label='Num Inference Step'
177
  )
178
 
179
- text2image_in_predict = gr.Button(value='Generator')
180
-
181
- with gr.Column():
182
- output_image = gr.Video(label='Output')
183
-
184
-
185
- text2image_in_predict.click(
186
- fn=stable_diffusion_zoom_in,
187
- inputs=[
188
- text2image_in_model_path,
189
- text2image_in_prompt,
190
- text2image_in_negative_prompt,
191
- text2image_in_guidance_scale,
192
- text2image_in_num_inference_step,
193
- ],
194
- outputs=output_image
195
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "lurry, bad art, blurred, text, watermark",
23
  ]
24
 
25
+ class StableDiffusionZoomIn:
26
+ def __init__(self):
27
+ self.pipe = None
28
+
29
+ def load_model(self, model_id):
30
+ if self.pipe is None:
31
+ self.pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16")
32
+
33
+ self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config)
34
+ self.pipe = self.pipe.to("cuda")
35
+
36
+ self.pipe.safety_checker = dummy
37
+ self.pipe.enable_attention_slicing()
38
+ self.pipe.enable_xformers_memory_efficient_attention()
39
+ self.g_cuda = torch.Generator(device='cuda')
40
+
41
+ return self.pipe
42
+
43
+ def generate_video(
44
+ self,
45
+ model_id,
46
+ prompt,
47
+ negative_prompt,
48
+ guidance_scale,
49
+ num_inference_steps,
50
+ ):
51
+
52
+ pipe = self.load_model(model_id)
53
 
54
+ num_init_images = 2
55
+ seed = 9999
56
+ height = 512
57
+ width = height
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ current_image = Image.new(mode="RGBA", size=(height, width))
60
+ mask_image = np.array(current_image)[:,:,3]
61
+ mask_image = Image.fromarray(255-mask_image).convert("RGB")
62
+ current_image = current_image.convert("RGB")
63
 
64
+ init_images = pipe(prompt=[prompt]*num_init_images,
65
+ negative_prompt=[negative_prompt]*num_init_images,
66
+ image=current_image,
67
+ guidance_scale = guidance_scale,
68
+ height = height,
69
+ width = width,
70
+ generator = self.g_cuda.manual_seed(seed),
71
+ mask_image=mask_image,
72
+ num_inference_steps=num_inference_steps)[0]
73
 
74
 
75
+ image_grid(init_images, rows=1, cols=num_init_images)
 
 
 
 
76
 
 
 
 
77
 
78
+ init_image_selected = 1 #@param
79
+ if num_init_images == 1:
80
+ init_image_selected = 0
81
+ else:
82
+ init_image_selected = init_image_selected - 1
83
 
84
+ num_outpainting_steps = 20 #@param
85
+ mask_width = 128 #@param
86
+ num_interpol_frames = 30 #@param
87
 
88
+ current_image = init_images[init_image_selected]
89
+ all_frames = []
90
+ all_frames.append(current_image)
91
 
92
+ for i in range(num_outpainting_steps):
93
+ print('Generating image: ' + str(i+1) + ' / ' + str(num_outpainting_steps))
94
+
95
+ prev_image_fix = current_image
96
+
97
+ prev_image = shrink_and_paste_on_blank(current_image, mask_width)
98
+
99
+ current_image = prev_image
100
+
101
+ #create mask (black image with white mask_width width edges)
102
+ mask_image = np.array(current_image)[:,:,3]
103
+ mask_image = Image.fromarray(255-mask_image).convert("RGB")
104
+
105
+ #inpainting step
106
+ current_image = current_image.convert("RGB")
107
+ images = pipe(prompt=prompt,
108
+ negative_prompt=negative_prompt,
109
+ image=current_image,
110
+ guidance_scale = guidance_scale,
111
+ height = height,
112
+ width = width,
113
+ #this can make the whole thing deterministic but the output less exciting
114
+ #generator = g_cuda.manual_seed(seed),
115
+ mask_image=mask_image,
116
+ num_inference_steps=num_inference_steps)[0]
117
+ current_image = images[0]
118
+ current_image.paste(prev_image, mask=prev_image)
119
+
120
+ #interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
121
+ for j in range(num_interpol_frames - 1):
122
+ interpol_image = current_image
123
+ interpol_width = round(
124
+ (1- ( 1-2*mask_width/height )**( 1-(j+1)/num_interpol_frames ) )*height/2
125
+ )
126
+ interpol_image = interpol_image.crop((interpol_width,
127
+ interpol_width,
128
+ width - interpol_width,
129
+ height - interpol_width))
130
 
131
+ interpol_image = interpol_image.resize((height, width))
132
 
133
+ #paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
134
+ interpol_width2 = round(
135
+ ( 1 - (height-2*mask_width) / (height-2*interpol_width) ) / 2*height
136
+ )
137
+ prev_image_fix_crop = shrink_and_paste_on_blank(prev_image_fix, interpol_width2)
138
+ interpol_image.paste(prev_image_fix_crop, mask = prev_image_fix_crop)
139
 
140
+ all_frames.append(interpol_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ all_frames.append(current_image)
143
 
144
+ video_file_name = "infinite_zoom_out"
145
+ fps = 30
146
+ save_path = video_file_name + ".mp4"
147
+ write_video(save_path, all_frames, fps)
148
+ return save_path
149
 
150
 
151
 
152
+ def app():
153
+ with gr.Blocks():
154
+ with gr.Row():
155
+ with gr.Column():
156
+ text2image_in_model_path = gr.Dropdown(
157
+ choices=stable_paint_model_list,
158
+ value=stable_paint_model_list[0],
159
+ label='Text-Image Model Id'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  )
161
 
162
+ text2image_in_prompt = gr.Textbox(
163
+ lines=1,
164
+ value=stable_paint_prompt_list[0],
165
+ label='Prompt'
 
 
166
  )
167
 
168
+ text2image_in_negative_prompt = gr.Textbox(
169
+ lines=1,
170
+ value=stable_paint_negative_prompt_list[0],
171
+ label='Negative Prompt'
172
+ )
173
+
174
+ with gr.Row():
175
+ with gr.Column():
176
+ text2image_in_guidance_scale = gr.Slider(
177
+ minimum=0.1,
178
+ maximum=15,
179
+ step=0.1,
180
+ value=7.5,
181
+ label='Guidance Scale'
182
+ )
183
+
184
+ text2image_in_num_inference_step = gr.Slider(
185
+ minimum=1,
186
+ maximum=100,
187
+ step=1,
188
+ value=50,
189
+ label='Num Inference Step'
190
+ )
191
+
192
+ text2image_in_predict = gr.Button(value='Generator')
193
+
194
+ with gr.Column():
195
+ output_image = gr.Video(label='Output')
196
+
197
+
198
+ text2image_in_predict.click(
199
+ fn=StableDiffusionZoomIn().generate_video,
200
+ inputs=[
201
+ text2image_in_model_path,
202
+ text2image_in_prompt,
203
+ text2image_in_negative_prompt,
204
+ text2image_in_guidance_scale,
205
+ text2image_in_num_inference_step,
206
+ ],
207
+ outputs=output_image
208
+ )
inpaint_zoom/app/zoom_out_app.py CHANGED
@@ -79,7 +79,7 @@ def stable_diffusion_zoom_out_app():
79
  )
80
 
81
  text2image_out_prompt = gr.Textbox(
82
- lines=1,
83
  value=stable_paint_prompt_list[0],
84
  label='Prompt'
85
  )
@@ -89,39 +89,41 @@ def stable_diffusion_zoom_out_app():
89
  value=stable_paint_negative_prompt_list[0],
90
  label='Negative Prompt'
91
  )
92
-
93
- with gr.Accordion("Advanced Options", open=False):
94
- text2image_out_guidance_scale = gr.Slider(
95
- minimum=0.1,
96
- maximum=15,
97
- step=0.1,
98
- value=7.5,
99
- label='Guidance Scale'
100
- )
101
-
102
- text2image_out_num_inference_step = gr.Slider(
103
- minimum=1,
104
- maximum=100,
105
- step=1,
106
- value=50,
107
- label='Num Inference Step'
108
- )
109
-
110
- text2image_out_step_size = gr.Slider(
111
- minimum=1,
112
- maximum=100,
113
- step=1,
114
- value=10,
115
- label='Step Size'
116
- )
117
-
118
- text2image_out_num_frames = gr.Slider(
119
- minimum=1,
120
- maximum=100,
121
- step=1,
122
- value=10,
123
- label='Frames'
124
- )
 
 
125
 
126
  text2image_out_predict = gr.Button(value='Generator')
127
 
79
  )
80
 
81
  text2image_out_prompt = gr.Textbox(
82
+ lines=2,
83
  value=stable_paint_prompt_list[0],
84
  label='Prompt'
85
  )
89
  value=stable_paint_negative_prompt_list[0],
90
  label='Negative Prompt'
91
  )
92
+
93
+ with gr.Row():
94
+ with gr.Column():
95
+ text2image_out_guidance_scale = gr.Slider(
96
+ minimum=0.1,
97
+ maximum=15,
98
+ step=0.1,
99
+ value=7.5,
100
+ label='Guidance Scale'
101
+ )
102
+
103
+ text2image_out_num_inference_step = gr.Slider(
104
+ minimum=1,
105
+ maximum=100,
106
+ step=1,
107
+ value=50,
108
+ label='Num Inference Step'
109
+ )
110
+ with gr.Row():
111
+ with gr.Column():
112
+ text2image_out_step_size = gr.Slider(
113
+ minimum=1,
114
+ maximum=100,
115
+ step=1,
116
+ value=10,
117
+ label='Step Size'
118
+ )
119
+
120
+ text2image_out_num_frames = gr.Slider(
121
+ minimum=1,
122
+ maximum=100,
123
+ step=1,
124
+ value=10,
125
+ label='Frames'
126
+ )
127
 
128
  text2image_out_predict = gr.Button(value='Generator')
129
 
inpaint_zoom/zoom_out_app.py DELETED
@@ -1,154 +0,0 @@
1
- from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
2
- from inpaint_zoom.zoom_out_utils import preprocess_image, preprocess_mask_image, write_video, dummy
3
- from PIL import Image
4
- import gradio as gr
5
- import torch
6
- import os
7
- os.environ["CUDA_VISIBLE_DEVICES"]="0"
8
-
9
-
10
- stable_paint_model_list = [
11
- "stabilityai/stable-diffusion-2-inpainting",
12
- "runwayml/stable-diffusion-inpainting"
13
- ]
14
-
15
- stable_paint_prompt_list = [
16
- "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers",
17
- "A beautiful landscape of a mountain range with a lake in the foreground",
18
- ]
19
-
20
- stable_paint_negative_prompt_list = [
21
- "lurry, bad art, blurred, text, watermark",
22
- ]
23
-
24
-
25
- def stable_diffusion_zoom_out(
26
- model_id,
27
- original_prompt,
28
- negative_prompt,
29
- guidance_scale,
30
- num_inference_steps,
31
- step_size,
32
- num_frames,
33
- fps,
34
- ):
35
-
36
- pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
37
- pipe.set_use_memory_efficient_attention_xformers(True)
38
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
39
- pipe = pipe.to("cuda")
40
- pipe.safety_checker = dummy
41
-
42
- new_image = Image.new(mode="RGBA", size=(512,512))
43
- current_image, mask_image = preprocess_mask_image(new_image)
44
-
45
- current_image = pipe(
46
- prompt=[original_prompt],
47
- negative_prompt=[negative_prompt],
48
- image=current_image,
49
- mask_image=mask_image,
50
- num_inference_steps=num_inference_steps,
51
- guidance_scale=guidance_scale
52
- ).images[0]
53
-
54
-
55
- all_frames = []
56
- all_frames.append(current_image)
57
-
58
- for i in range(num_frames):
59
- prev_image = preprocess_image(current_image, step_size, 512)
60
- current_image = prev_image
61
- current_image, mask_image = preprocess_mask_image(current_image)
62
- current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
63
-
64
- current_image.paste(prev_image, mask=prev_image)
65
- all_frames.append(current_image)
66
-
67
- save_path = "output.mp4"
68
- write_video(save_path, all_frames, fps=fps)
69
- return save_path
70
-
71
-
72
- def stable_diffusion_text2img_app():
73
- with gr.Blocks():
74
- with gr.Row():
75
- with gr.Column():
76
- text2image_out_model_path = gr.Dropdown(
77
- choices=stable_paint_model_list,
78
- value=stable_paint_model_list[0],
79
- label='Text-Image Model Id'
80
- )
81
-
82
- text2image_out_prompt = gr.Textbox(
83
- lines=1,
84
- value=stable_paint_prompt_list[0],
85
- label='Prompt'
86
- )
87
-
88
- text2image_out_negative_prompt = gr.Textbox(
89
- lines=1,
90
- value=stable_paint_negative_prompt_list[0],
91
- label='Negative Prompt'
92
- )
93
-
94
- with gr.Accordion("Advanced Options", open=False):
95
- text2image_out_guidance_scale = gr.Slider(
96
- minimum=0.1,
97
- maximum=15,
98
- step=0.1,
99
- value=7.5,
100
- label='Guidance Scale'
101
- )
102
-
103
- text2image_out_num_inference_step = gr.Slider(
104
- minimum=1,
105
- maximum=100,
106
- step=1,
107
- value=50,
108
- label='Num Inference Step'
109
- )
110
-
111
- text2image_out_step_size = gr.Slider(
112
- minimum=1,
113
- maximum=100,
114
- step=1,
115
- value=10,
116
- label='Step Size'
117
- )
118
-
119
- text2image_out_num_frames = gr.Slider(
120
- minimum=1,
121
- maximum=100,
122
- step=1,
123
- value=10,
124
- label='Frames'
125
- )
126
-
127
- text2image_out_fps = gr.Slider(
128
- minimum=1,
129
- maximum=100,
130
- step=1,
131
- value=30,
132
- label='FPS'
133
- )
134
-
135
- text2image_out_predict = gr.Button(value='Generator')
136
-
137
- with gr.Column():
138
- output_image = gr.Video(label='Output')
139
-
140
-
141
- text2image_out_predict.click(
142
- fn=stable_diffusion_zoom_out,
143
- inputs=[
144
- text2image_out_model_path,
145
- text2image_out_prompt,
146
- text2image_out_negative_prompt,
147
- text2image_out_guidance_scale,
148
- text2image_out_num_inference_step,
149
- text2image_out_step_size,
150
- text2image_out_num_frames,
151
- text2image_out_fps
152
- ],
153
- outputs=output_image
154
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
inpaint_zoom/zoom_out_utils.py DELETED
@@ -1,45 +0,0 @@
1
- import numpy as np
2
- import cv2
3
- from PIL import Image
4
-
5
- def write_video(file_path, frames, fps):
6
- """
7
- Writes frames to an mp4 video file
8
- :param file_path: Path to output video, must end with .mp4
9
- :param frames: List of PIL.Image objects
10
- :param fps: Desired frame rate
11
- """
12
-
13
- w, h = frames[0].size
14
- fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
15
- writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
16
-
17
- for frame in frames:
18
- np_frame = np.array(frame.convert('RGB'))
19
- cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
20
- writer.write(cv_frame)
21
-
22
- writer.release()
23
-
24
-
25
- def dummy(images, **kwargs):
26
- return images, False
27
-
28
- def preprocess_image(current_image, steps, image_size):
29
- next_image = np.array(current_image.convert("RGBA"))*0
30
- prev_image = current_image.resize((image_size-2*steps,image_size-2*steps))
31
- prev_image = prev_image.convert("RGBA")
32
- prev_image = np.array(prev_image)
33
- next_image[:, :, 3] = 1
34
- next_image[steps:image_size-steps,steps:image_size-steps,:] = prev_image
35
- prev_image = Image.fromarray(next_image)
36
-
37
- return prev_image
38
-
39
-
40
- def preprocess_mask_image(current_image):
41
- mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
42
- mask_image = Image.fromarray(255-mask_image).convert("RGB")
43
- current_image = current_image.convert("RGB")
44
-
45
- return current_image, mask_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py DELETED
@@ -1,45 +0,0 @@
1
- import numpy as np
2
- import cv2
3
- from PIL import Image
4
-
5
- def write_video(file_path, frames, fps):
6
- """
7
- Writes frames to an mp4 video file
8
- :param file_path: Path to output video, must end with .mp4
9
- :param frames: List of PIL.Image objects
10
- :param fps: Desired frame rate
11
- """
12
-
13
- w, h = frames[0].size
14
- fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
15
- writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
16
-
17
- for frame in frames:
18
- np_frame = np.array(frame.convert('RGB'))
19
- cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
20
- writer.write(cv_frame)
21
-
22
- writer.release()
23
-
24
-
25
- def dummy(images, **kwargs):
26
- return images, False
27
-
28
- def preprocess_image(current_image, steps, image_size):
29
- next_image = np.array(current_image.convert("RGBA"))*0
30
- prev_image = current_image.resize((image_size-2*steps,image_size-2*steps))
31
- prev_image = prev_image.convert("RGBA")
32
- prev_image = np.array(prev_image)
33
- next_image[:, :, 3] = 1
34
- next_image[steps:image_size-steps,steps:image_size-steps,:] = prev_image
35
- prev_image = Image.fromarray(next_image)
36
-
37
- return prev_image
38
-
39
-
40
- def preprocess_mask_image(current_image):
41
- mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
42
- mask_image = Image.fromarray(255-mask_image).convert("RGB")
43
- current_image = current_image.convert("RGB")
44
-
45
- return current_image, mask_image