lemonaddie commited on
Commit
381efd0
·
verified ·
1 Parent(s): f5889f1

Delete run

Browse files
run/run_inference_wild_clip.py DELETED
@@ -1,273 +0,0 @@
1
- # A reimplemented version in public environments by Xiao Fu and Mu Hu
2
-
3
- import argparse
4
- import os
5
- import logging
6
-
7
- import numpy as np
8
- import torch
9
- from PIL import Image
10
- from tqdm.auto import tqdm
11
- import glob
12
- import json
13
- import cv2
14
-
15
- import sys
16
- sys.path.append("../")
17
- from models.depth_normal_pipeline_clip import DepthNormalEstimationPipeline
18
- from utils.seed_all import seed_all
19
- import matplotlib.pyplot as plt
20
- from dataloader.file_io import read_hdf5, align_normal, creat_uv_mesh
21
- from utils.de_normalized import align_scale_shift
22
- from utils.depth2normal import *
23
-
24
- from diffusers import DiffusionPipeline, DDIMScheduler, AutoencoderKL
25
- from models.unet_2d_condition import UNet2DConditionModel
26
-
27
- from transformers import CLIPTextModel, CLIPTokenizer
28
- from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
29
- import torchvision.transforms.functional as TF
30
- from torchvision.transforms import InterpolationMode
31
-
32
- def add_margin(pil_img, top, right, bottom, left, color):
33
- width, height = pil_img.size
34
- new_width = width + right + left
35
- new_height = height + top + bottom
36
- result = Image.new(pil_img.mode, (new_width, new_height), color)
37
- result.paste(pil_img, (left, top))
38
- return result
39
-
40
- if __name__=="__main__":
41
-
42
- use_seperate = True
43
-
44
- logging.basicConfig(level=logging.INFO)
45
-
46
- '''Set the Args'''
47
- parser = argparse.ArgumentParser(
48
- description="Run MonoDepthNormal Estimation using Stable Diffusion."
49
- )
50
- parser.add_argument(
51
- "--pretrained_model_path",
52
- type=str,
53
- default='None',
54
- help="pretrained model path from hugging face or local dir",
55
- )
56
- parser.add_argument(
57
- "--input_dir", type=str, required=True, help="Input directory."
58
- )
59
-
60
- parser.add_argument(
61
- "--output_dir", type=str, required=True, help="Output directory."
62
- )
63
- parser.add_argument(
64
- "--domain",
65
- type=str,
66
- default='indoor',
67
- required=True,
68
- help="domain prediction",
69
- )
70
-
71
- # inference setting
72
- parser.add_argument(
73
- "--denoise_steps",
74
- type=int,
75
- default=10,
76
- help="Diffusion denoising steps, more steps results in higher accuracy but slower inference speed.",
77
- )
78
- parser.add_argument(
79
- "--ensemble_size",
80
- type=int,
81
- default=10,
82
- help="Number of predictions to be ensembled, more inference gives better results but runs slower.",
83
- )
84
- parser.add_argument(
85
- "--half_precision",
86
- action="store_true",
87
- help="Run with half-precision (16-bit float), might lead to suboptimal result.",
88
- )
89
-
90
- # resolution setting
91
- parser.add_argument(
92
- "--processing_res",
93
- type=int,
94
- default=768,
95
- help="Maximum resolution of processing. 0 for using input image resolution. Default: 768.",
96
- )
97
- parser.add_argument(
98
- "--output_processing_res",
99
- action="store_true",
100
- help="When input is resized, out put depth at resized operating resolution. Default: False.",
101
- )
102
-
103
- # depth map colormap
104
- parser.add_argument(
105
- "--color_map",
106
- type=str,
107
- default="Spectral",
108
- help="Colormap used to render depth predictions.",
109
- )
110
- # other settings
111
- parser.add_argument("--seed", type=int, default=None, help="Random seed.")
112
- parser.add_argument(
113
- "--batch_size",
114
- type=int,
115
- default=0,
116
- help="Inference batch size. Default: 0 (will be set automatically).",
117
- )
118
-
119
- args = parser.parse_args()
120
-
121
- checkpoint_path = args.pretrained_model_path
122
- output_dir = args.output_dir
123
- denoise_steps = args.denoise_steps
124
- ensemble_size = args.ensemble_size
125
-
126
- if ensemble_size>15:
127
- logging.warning("long ensemble steps, low speed..")
128
-
129
- half_precision = args.half_precision
130
-
131
- processing_res = args.processing_res
132
- match_input_res = not args.output_processing_res
133
- domain = args.domain
134
-
135
- color_map = args.color_map
136
- seed = args.seed
137
- batch_size = args.batch_size
138
-
139
- if batch_size==0:
140
- batch_size = 1 # set default batchsize
141
-
142
- # -------------------- Preparation --------------------
143
- # Random seed
144
- if seed is None:
145
- import time
146
-
147
- seed = int(time.time())
148
- seed_all(seed)
149
-
150
- # Output directories
151
- output_dir_color = os.path.join(output_dir, "depth_colored")
152
- output_dir_npy = os.path.join(output_dir, "depth_npy")
153
- output_dir_normal_npy = os.path.join(output_dir, "normal_npy")
154
- output_dir_normal_color = os.path.join(output_dir, "normal_colored")
155
- os.makedirs(output_dir, exist_ok=True)
156
- os.makedirs(output_dir_color, exist_ok=True)
157
- os.makedirs(output_dir_npy, exist_ok=True)
158
- os.makedirs(output_dir_normal_npy, exist_ok=True)
159
- os.makedirs(output_dir_normal_color, exist_ok=True)
160
- logging.info(f"output dir = {output_dir}")
161
-
162
- # -------------------- Device --------------------
163
- if torch.cuda.is_available():
164
- device = torch.device("cuda")
165
- else:
166
- device = torch.device("cpu")
167
- logging.warning("CUDA is not available. Running on CPU will be slow.")
168
- logging.info(f"device = {device}")
169
-
170
-
171
- # -------------------- Data --------------------
172
- input_dir = args.input_dir
173
- test_files = sorted(os.listdir(input_dir))
174
- n_images = len(test_files)
175
- if n_images > 0:
176
- logging.info(f"Found {n_images} images")
177
- else:
178
- logging.error(f"No image found in '{input_rgb_dir}'")
179
- exit(1)
180
-
181
- # -------------------- Model --------------------
182
- if half_precision:
183
- dtype = torch.float16
184
- logging.info(f"Running with half precision ({dtype}).")
185
- else:
186
- dtype = torch.float32
187
-
188
- # declare a pipeline
189
-
190
- if not use_seperate:
191
- pipe = DepthNormalEstimationPipeline.from_pretrained(checkpoint_path, torch_dtype=dtype)
192
- print("Using Completed")
193
- else:
194
- stable_diffusion_repo_path = ""
195
- vae = AutoencoderKL.from_pretrained(stable_diffusion_repo_path, subfolder='vae')
196
- scheduler = DDIMScheduler.from_pretrained(stable_diffusion_repo_path, subfolder='scheduler')
197
- sd_image_variations_diffusers_path = ''
198
- image_encoder = CLIPVisionModelWithProjection.from_pretrained(sd_image_variations_diffusers_path, subfolder="image_encoder")
199
- feature_extractor = CLIPImageProcessor.from_pretrained(sd_image_variations_diffusers_path, subfolder="feature_extractor")
200
-
201
- # https://huggingface.co/docs/diffusers/training/adapt_a_model
202
-
203
- unet = UNet2DConditionModel.from_pretrained(checkpoint_path)
204
-
205
- pipe = DepthNormalEstimationPipeline(vae=vae,
206
- image_encoder=image_encoder,
207
- feature_extractor=feature_extractor,
208
- unet=unet,
209
- scheduler=scheduler)
210
- print("Using Seperated Modules")
211
-
212
- logging.info("loading pipeline whole successfully.")
213
-
214
- try:
215
- pipe.enable_xformers_memory_efficient_attention()
216
- except:
217
- pass # run without xformers
218
-
219
- pipe = pipe.to(device)
220
-
221
- # -------------------- Inference and saving --------------------
222
- with torch.no_grad():
223
- os.makedirs(output_dir, exist_ok=True)
224
-
225
- for test_file in tqdm(test_files, desc="Estimating depth", leave=True):
226
- rgb_path = os.path.join(input_dir, test_file)
227
-
228
- # Read input image
229
- input_image = Image.open(rgb_path)
230
-
231
- # predict the depth here
232
- pipe_out = pipe(input_image,
233
- denosing_steps = denoise_steps,
234
- ensemble_size= ensemble_size,
235
- processing_res = processing_res,
236
- match_input_res = match_input_res,
237
- domain = domain,
238
- color_map = color_map,
239
- show_progress_bar = True,
240
- )
241
-
242
- depth_pred: np.ndarray = pipe_out.depth_np
243
- depth_colored: Image.Image = pipe_out.depth_colored
244
- normal_pred: np.ndarray = pipe_out.normal_np
245
- normal_colored: Image.Image = pipe_out.normal_colored
246
-
247
- # Save as npy
248
- rgb_name_base = os.path.splitext(os.path.basename(rgb_path))[0]
249
- pred_name_base = rgb_name_base + "_pred"
250
- npy_save_path = os.path.join(output_dir_npy, f"{pred_name_base}.npy")
251
- if os.path.exists(npy_save_path):
252
- logging.warning(f"Existing file: '{npy_save_path}' will be overwritten")
253
- np.save(npy_save_path, depth_pred)
254
-
255
- normal_npy_save_path = os.path.join(output_dir_normal_npy, f"{pred_name_base}.npy")
256
- if os.path.exists(normal_npy_save_path):
257
- logging.warning(f"Existing file: '{normal_npy_save_path}' will be overwritten")
258
- np.save(normal_npy_save_path, normal_pred)
259
-
260
- # Colorize
261
- depth_colored_save_path = os.path.join(output_dir_color, f"{pred_name_base}_colored.png")
262
- if os.path.exists(depth_colored_save_path):
263
- logging.warning(
264
- f"Existing file: '{depth_colored_save_path}' will be overwritten"
265
- )
266
- depth_colored.save(depth_colored_save_path)
267
-
268
- normal_colored_save_path = os.path.join(output_dir_normal_color, f"{pred_name_base}_colored.png")
269
- if os.path.exists(normal_colored_save_path):
270
- logging.warning(
271
- f"Existing file: '{normal_colored_save_path}' will be overwritten"
272
- )
273
- normal_colored.save(normal_colored_save_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
run/run_inference_wild_clip_cfg.py DELETED
@@ -1,273 +0,0 @@
1
- # A reimplemented version in public environments by Xiao Fu and Mu Hu
2
-
3
- import argparse
4
- import os
5
- import logging
6
-
7
- import numpy as np
8
- import torch
9
- from PIL import Image
10
- from tqdm.auto import tqdm
11
- import glob
12
- import json
13
- import cv2
14
-
15
- import sys
16
- sys.path.append("../")
17
- from models.depth_normal_pipeline_clip_cfg import DepthNormalEstimationPipeline
18
- from utils.seed_all import seed_all
19
- import matplotlib.pyplot as plt
20
- from dataloader.file_io import read_hdf5, align_normal, creat_uv_mesh
21
- from utils.de_normalized import align_scale_shift
22
- from utils.depth2normal import *
23
-
24
- from diffusers import DiffusionPipeline, DDIMScheduler, AutoencoderKL
25
- from models.unet_2d_condition import UNet2DConditionModel
26
-
27
- from transformers import CLIPTextModel, CLIPTokenizer
28
- from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
29
- import torchvision.transforms.functional as TF
30
- from torchvision.transforms import InterpolationMode
31
-
32
- if __name__=="__main__":
33
-
34
- use_seperate = True
35
-
36
- logging.basicConfig(level=logging.INFO)
37
-
38
- '''Set the Args'''
39
- parser = argparse.ArgumentParser(
40
- description="Run MonoDepthNormal Estimation using Stable Diffusion."
41
- )
42
- parser.add_argument(
43
- "--pretrained_model_path",
44
- type=str,
45
- default='None',
46
- help="pretrained model path from hugging face or local dir",
47
- )
48
- parser.add_argument(
49
- "--input_dir", type=str, required=True, help="Input directory."
50
- )
51
-
52
- parser.add_argument(
53
- "--output_dir", type=str, required=True, help="Output directory."
54
- )
55
- parser.add_argument(
56
- "--domain",
57
- type=str,
58
- default='indoor',
59
- required=True,
60
- help="domain prediction",
61
- )
62
-
63
- # inference setting
64
- parser.add_argument(
65
- "--denoise_steps",
66
- type=int,
67
- default=10,
68
- help="Diffusion denoising steps, more steps results in higher accuracy but slower inference speed.",
69
- )
70
- parser.add_argument(
71
- "--guidance_scale",
72
- type=int,
73
- default=1,
74
- help="scale for classifier-free guidance.",
75
- )
76
- parser.add_argument(
77
- "--ensemble_size",
78
- type=int,
79
- default=10,
80
- help="Number of predictions to be ensembled, more inference gives better results but runs slower.",
81
- )
82
- parser.add_argument(
83
- "--half_precision",
84
- action="store_true",
85
- help="Run with half-precision (16-bit float), might lead to suboptimal result.",
86
- )
87
-
88
- # resolution setting
89
- parser.add_argument(
90
- "--processing_res",
91
- type=int,
92
- default=768,
93
- help="Maximum resolution of processing. 0 for using input image resolution. Default: 768.",
94
- )
95
- parser.add_argument(
96
- "--output_processing_res",
97
- action="store_true",
98
- help="When input is resized, out put depth at resized operating resolution. Default: False.",
99
- )
100
-
101
- # depth map colormap
102
- parser.add_argument(
103
- "--color_map",
104
- type=str,
105
- default="Spectral",
106
- help="Colormap used to render depth predictions.",
107
- )
108
- # other settings
109
- parser.add_argument("--seed", type=int, default=None, help="Random seed.")
110
- parser.add_argument(
111
- "--batch_size",
112
- type=int,
113
- default=0,
114
- help="Inference batch size. Default: 0 (will be set automatically).",
115
- )
116
-
117
- args = parser.parse_args()
118
-
119
- checkpoint_path = args.pretrained_model_path
120
- output_dir = args.output_dir
121
- denoise_steps = args.denoise_steps
122
- ensemble_size = args.ensemble_size
123
-
124
- if ensemble_size>10:
125
- logging.warning("long ensemble steps, low speed..")
126
-
127
- half_precision = args.half_precision
128
-
129
- processing_res = args.processing_res
130
- match_input_res = not args.output_processing_res
131
- domain = args.domain
132
-
133
- color_map = args.color_map
134
- seed = args.seed
135
- batch_size = args.batch_size
136
-
137
- if batch_size==0:
138
- batch_size = 1 # set default batchsize
139
-
140
- # -------------------- Preparation --------------------
141
- # Random seed
142
- if seed is None:
143
- import time
144
-
145
- seed = int(time.time())
146
- seed_all(seed)
147
-
148
- # Output directories
149
- output_dir_color = os.path.join(output_dir, "depth_colored")
150
- output_dir_npy = os.path.join(output_dir, "depth_npy")
151
- output_dir_normal_npy = os.path.join(output_dir, "normal_npy")
152
- output_dir_normal_color = os.path.join(output_dir, "normal_colored")
153
- os.makedirs(output_dir, exist_ok=True)
154
- os.makedirs(output_dir_color, exist_ok=True)
155
- os.makedirs(output_dir_npy, exist_ok=True)
156
- os.makedirs(output_dir_normal_npy, exist_ok=True)
157
- os.makedirs(output_dir_normal_color, exist_ok=True)
158
- logging.info(f"output dir = {output_dir}")
159
-
160
- # -------------------- Device --------------------
161
- if torch.cuda.is_available():
162
- device = torch.device("cuda")
163
- else:
164
- device = torch.device("cpu")
165
- logging.warning("CUDA is not available. Running on CPU will be slow.")
166
- logging.info(f"device = {device}")
167
-
168
-
169
- # -------------------- Data --------------------
170
- input_dir = args.input_dir
171
- test_files = os.listdir(input_dir)
172
- n_images = len(test_files)
173
- if n_images > 0:
174
- logging.info(f"Found {n_images} images")
175
- else:
176
- logging.error(f"No image found in '{input_rgb_dir}'")
177
- exit(1)
178
-
179
- # -------------------- Model --------------------
180
- if half_precision:
181
- dtype = torch.float16
182
- logging.info(f"Running with half precision ({dtype}).")
183
- else:
184
- dtype = torch.float32
185
-
186
- # declare a pipeline
187
-
188
- if not use_seperate:
189
- pipe = DepthNormalEstimationPipeline.from_pretrained(checkpoint_path, torch_dtype=dtype)
190
- print("Using Completed")
191
- else:
192
- stable_diffusion_repo_path = "Bingxin/Marigold"
193
- vae = AutoencoderKL.from_pretrained(stable_diffusion_repo_path, subfolder='vae')
194
- scheduler = DDIMScheduler.from_pretrained(stable_diffusion_repo_path, subfolder='scheduler')
195
- sd_image_variations_diffusers_path = "lambdalabs/sd-image-variations-diffusers"
196
- image_encoder = CLIPVisionModelWithProjection.from_pretrained(sd_image_variations_diffusers_path, subfolder="image_encoder")
197
- feature_extractor = CLIPImageProcessor.from_pretrained(sd_image_variations_diffusers_path, subfolder="feature_extractor")
198
-
199
- # https://huggingface.co/docs/diffusers/training/adapt_a_model
200
-
201
- import ipdb; ipdb.set_trace()
202
- unet = UNet2DConditionModel.from_pretrained(checkpoint_path)
203
-
204
- pipe = DepthNormalEstimationPipeline(vae=vae,
205
- image_encoder=image_encoder,
206
- feature_extractor=feature_extractor,
207
- unet=unet,
208
- scheduler=scheduler)
209
- print("Using Seperated Modules")
210
-
211
- logging.info("loading pipeline whole successfully.")
212
-
213
- try:
214
- pipe.enable_xformers_memory_efficient_attention()
215
- except:
216
- pass # run without xformers
217
-
218
- pipe = pipe.to(device)
219
-
220
- # -------------------- Inference and saving --------------------
221
- with torch.no_grad():
222
- os.makedirs(output_dir, exist_ok=True)
223
-
224
- for test_file in tqdm(test_files, desc="Estimating depth", leave=True):
225
- rgb_path = os.path.join(input_dir, test_file)
226
-
227
- # Read input image
228
- input_image = Image.open(rgb_path)
229
-
230
- # predict the depth here
231
- pipe_out = pipe(input_image,
232
- denosing_steps = denoise_steps,
233
- ensemble_size= ensemble_size,
234
- processing_res = processing_res,
235
- match_input_res = match_input_res,
236
- guidance_scale = guidance_scale,
237
- domain = domain,
238
- color_map = color_map,
239
- show_progress_bar = True,
240
- )
241
-
242
- depth_pred: np.ndarray = pipe_out.depth_np
243
- depth_colored: Image.Image = pipe_out.depth_colored
244
- normal_pred: np.ndarray = pipe_out.normal_np
245
- normal_colored: Image.Image = pipe_out.normal_colored
246
-
247
- # Save as npy
248
- rgb_name_base = os.path.splitext(os.path.basename(rgb_path))[0]
249
- pred_name_base = rgb_name_base + "_pred"
250
- npy_save_path = os.path.join(output_dir_npy, f"{pred_name_base}.npy")
251
- if os.path.exists(npy_save_path):
252
- logging.warning(f"Existing file: '{npy_save_path}' will be overwritten")
253
- np.save(npy_save_path, depth_pred)
254
-
255
- normal_npy_save_path = os.path.join(output_dir_normal_npy, f"{pred_name_base}.npy")
256
- if os.path.exists(normal_npy_save_path):
257
- logging.warning(f"Existing file: '{normal_npy_save_path}' will be overwritten")
258
- np.save(normal_npy_save_path, normal_pred)
259
-
260
- # Colorize
261
- depth_colored_save_path = os.path.join(output_dir_color, f"{pred_name_base}_colored.png")
262
- if os.path.exists(depth_colored_save_path):
263
- logging.warning(
264
- f"Existing file: '{depth_colored_save_path}' will be overwritten"
265
- )
266
- depth_colored.save(depth_colored_save_path)
267
-
268
- normal_colored_save_path = os.path.join(output_dir_normal_color, f"{pred_name_base}_colored.png")
269
- if os.path.exists(normal_colored_save_path):
270
- logging.warning(
271
- f"Existing file: '{normal_colored_save_path}' will be overwritten"
272
- )
273
- normal_colored.save(normal_colored_save_path)