Spaces:

jho
/

MonocularDepth

Build error

App Files Files Community

ohjho commited on Jun 15, 2022

Commit

b240372

•

1 Parent(s): b369bda

tested BTS model and added to the app

Browse files

Files changed (4) hide show

BTS.py +5 -0
BTS_infer.py +62 -12
DPT.py +2 -2
app.py +93 -52

BTS.py CHANGED Viewed

@@ -419,6 +419,11 @@ class BtsController:
         depth_map = np.asarray(cv2.cvtColor(depth_map, cv2.COLOR_GRAY2RGB), np.uint8)
         return depth_map
     @staticmethod
     def normalize_img(image):
         transformation = A.Compose([

         depth_map = np.asarray(cv2.cvtColor(depth_map, cv2.COLOR_GRAY2RGB), np.uint8)
         return depth_map
+    @staticmethod
+    def depth_map_to_grayimg(depth_map):
+        depth_map = np.asarray(np.squeeze((255 - torch.clamp_max(depth_map * 4, 250)).byte().numpy()), np.uint8)
+        return depth_map
     @staticmethod
     def normalize_img(image):
         transformation = A.Compose([

BTS_infer.py CHANGED Viewed

@@ -1,22 +1,25 @@
 import BTS, cv2, torch, gdown, os, zipfile
 import numpy as np
-def download_model_weight(model_dir, key = "1_mENn0G9YlLAAr3N8DVDt4Hk2SBbo1pl"):
     if not os.path.isdir(model_dir):
         print(f'--- making model directory: {model_dir}')
         os.makedirs(model_dir)
     url = f'https://drive.google.com/uc?id={key}&export=download'
-    tmp_zip_fp = os.path.join(model_dir, 'tmp.zip')
     print(f'--- downloading model weights from {url}')
     gdown.download(url, tmp_zip_fp, quiet = True)
-    with zipfile.ZipFile(tmp_zip_fp, "r") as zip_ref:
-        for file in zip_ref.namelist():
-            zip_ref.extract(file, model_dir)
-    os.remove(tmp_zip_fp)
-    print("--- downloaded model weights done!", flush=True)
 def get_model(model_path = './models/bts_latest'):
     if not os.path.isfile(model_path):
@@ -26,11 +29,58 @@ def get_model(model_path = './models/bts_latest'):
     model.eval()
     return model
-def inference(img_array_rgb, model_obj):
-    # TODO: add resize max 1080 and multiple of 32
     prediction = model_obj.predict(img_array_rgb, is_channels_first = False, normalize = True)
-    visual_depth_map = model_obj.depth_map_to_rgbimg(prediction)
-    return visual_depth_map
     # prediction = torch.nn.functional.interpolate(
     #     prediction.unsqueeze(1),

 import BTS, cv2, torch, gdown, os, zipfile
 import numpy as np
+from PIL import Image
+def download_model_weight(model_dir,
+    file_key_dict = {'bts_latest':"1_mENn0G9YlLAAr3N8DVDt4Hk2SBbo1pl"}):
     if not os.path.isdir(model_dir):
         print(f'--- making model directory: {model_dir}')
         os.makedirs(model_dir)
+    fname = list(file_key_dict.keys())[0]
+    key = file_key_dict[fname]
     url = f'https://drive.google.com/uc?id={key}&export=download'
+    tmp_zip_fp = os.path.join(model_dir, fname)
     print(f'--- downloading model weights from {url}')
     gdown.download(url, tmp_zip_fp, quiet = True)
+    # with zipfile.ZipFile(tmp_zip_fp, "r") as zip_ref:
+    #     for file in zip_ref.namelist():
+    #         zip_ref.extract(file, model_dir)
+    # os.remove(tmp_zip_fp)
+    print(f"--- downloaded model weights to {tmp_zip_fp}", flush=True)
 def get_model(model_path = './models/bts_latest'):
     if not os.path.isfile(model_path):
     model.eval()
     return model
+def im_max_long_edge(im_np_array, size = 1080, return_pil_im = False,
+		resample_algo = Image.LANCZOS, debug = False):
+	''' Return an image whose long edge is no longer than the given size
+	Args:
+		resample_algo: default to LANCZOS b/c it gives best downscaling quality (per https://pillow.readthedocs.io/en/stable/handbook/concepts.html#filters-comparison-table)
+	'''
+	org_h, org_w, _ = im_np_array.shape
+	out_im = None
+	if debug:
+		print(f'im_max_long_edge: seeing input w,h of {(org_w, org_h)}')
+	if max(org_h, org_w) <= size:
+		out_im = im_np_array
+		if debug:
+			print(f'im_max_long_edge: image dim is smaller than max {size}. no resizing required.')
+	else:
+		wh_ratio = org_w / org_h
+		if org_h > org_w:
+			# fix h to size
+			h = size
+			w = h * wh_ratio
+		else:
+            # fix w to size
+			w = size
+			h = w / wh_ratio
+		w = int(w)
+		h = int(h)
+		pil_im = Image.fromarray(im_np_array).resize((w,h), resample = resample_algo)
+		out_im = np.array(pil_im)
+		if debug:
+			print(f'im_max_long_edge: resizing image to w,h of {(w,h)}')
+	return Image.fromarray(out_im) if return_pil_im else out_im
+def format_depth_map(depth_map, debug = True):
+    dmax = depth_map.max()
+    dmin = depth_map.min()
+    print(f'depth map origin min-max: ({dmin}, {dmax})')
+    # formatted = ((depth_map /dmax)* 255).astype('uint8')
+    # min-max normalization
+    formatted = (depth_map - depth_map.min())/(depth_map.max()-depth_map.min())
+    return (formatted * 255).astype('uint8')
+def inference(img_array_rgb, model_obj, as_pil = False):
+    h, w, _ = img_array_rgb.shape
+    img_array_rgb = im_max_long_edge(img_array_rgb,return_pil_im=False, size=720)
     prediction = model_obj.predict(img_array_rgb, is_channels_first = False, normalize = True)
+    visual_depth_map = model_obj.depth_map_to_grayimg(prediction)
+    visual_depth_map = format_depth_map(visual_depth_map)
+    visual_depth_map = Image.fromarray(visual_depth_map).resize((w,h),resample = Image.LANCZOS)
+    return visual_depth_map if as_pil else np.array(visual_depth_map)
     # prediction = torch.nn.functional.interpolate(
     #     prediction.unsqueeze(1),

DPT.py CHANGED Viewed

@@ -28,7 +28,7 @@ def load_model(model_type = 'DPT_Large'):
         'midas': midas, 'device': device, 'transform': transform
     }
-def inference(img_array_rgb, model_obj):
     '''run DPT model and returns a PIL image'''
     # img = cv2.imread(img.name)
     # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
@@ -50,7 +50,7 @@ def inference(img_array_rgb, model_obj):
     output = prediction.cpu().numpy()
     formatted = (output * 255 / np.max(output)).astype('uint8')
     img = Image.fromarray(formatted)
-    return img
 # inputs =  gr.inputs.Image(type='file', label="Original Image")
 # outputs = gr.outputs.Image(type="pil",label="Output Image")

         'midas': midas, 'device': device, 'transform': transform
     }
+def inference(img_array_rgb, model_obj, as_pil = False):
     '''run DPT model and returns a PIL image'''
     # img = cv2.imread(img.name)
     # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     output = prediction.cpu().numpy()
     formatted = (output * 255 / np.max(output)).astype('uint8')
     img = Image.fromarray(formatted)
+    return img if as_pil else formatted
 # inputs =  gr.inputs.Image(type='file', label="Original Image")
 # outputs = gr.outputs.Image(type="pil",label="Output Image")

app.py CHANGED Viewed

@@ -26,93 +26,134 @@ def get_image(st_asset = st.sidebar, as_np_arr = False, extension_list = ['jpg',
 		im = np.array(im)
 	return im
-def show_miro_logo(use_column_width = False, width = 100, st_asset= st.sidebar):
-	logo_url = 'https://miro.medium.com/max/1400/0*qLL-32srlq6Y_iTm.png'
-	st_asset.image(logo_url, use_column_width = use_column_width, channels = 'BGR', output_format = 'PNG', width = width)
-def im_draw_bbox(pil_im, x0, y0, x1, y1, color = 'black', width = 3, caption = None,
-			bbv_label_only = False):
 	'''
-	draw bounding box on the input image pil_im in-place
 	Args:
-		color: color name as read by Pillow.ImageColor
-		use_bbv: use bbox_visualizer
 	'''
-	import bbox_visualizer as bbv
-	if any([type(i)== float for i in [x0,y0,x1,y1]]):
-		warnings.warn(f'im_draw_bbox: at least one of x0,y0,x1,y1 is of the type float and is converted to int.')
-		x0 = int(x0)
-		y0 = int(y0)
-		x1 = int(x1)
-		y1 = int(y1)
-	if bbv_label_only:
-		if caption:
-			im_array = bbv.draw_flag_with_label(np.array(pil_im),
-						label = caption,
-						bbox = [x0,y0,x1,y1],
-						line_color = ImageColor.getrgb(color),
-						text_bg_color = ImageColor.getrgb(color)
-						)
-		else:
-			raise ValueError(f'im_draw_bbox: bbv_label_only is True but caption is None')
 	else:
-		im_array = bbv.draw_rectangle(np.array(pil_im),
-					bbox = [x0, y0, x1, y1],
-					bbox_color = ImageColor.getrgb(color),
-					thickness = width
-					)
-		im_array = bbv.add_label(
-					im_array, label = caption,
-					bbox = [x0,y0,x1,y1],
-					text_bg_color = ImageColor.getrgb(color)
-					)if caption else im_array
-	return Image.fromarray(im_array)
 ### Streamlit App ###
 @st.cache(allow_output_mutation = True)
 def get_model_zoo():
 	model_zoo = {
 		'DPT': {'infer_func': DPT.inference,'model': DPT.load_model()},
-		# 'BTS': {'infer_func': BTS_infer.inference,'model': BTS_infer.get_model()}
 	}
 	return model_zoo
-@st.cache(suppress_st_warning=True)
-def mono_depth(pil_im, model_name):
 	s_time = time.time()
 	model_zoo = get_model_zoo()
 	infer_func = model_zoo[model_name]['infer_func']
 	model_obj = model_zoo[model_name]['model']
 	depth_im = infer_func(img_array_rgb = np.array(pil_im),
 					model_obj = model_obj)
-	st.info(f'''
-		model name: {model_name}\n
-		inference time: `{round(time.time()-s_time,2)}` seconds\n
-		depth image shape: {np.array(depth_im).shape}\n
-		depth image type: {type(depth_im)}
-		''')
 	return depth_im
-def Main():
-	st.set_page_config(layout = 'wide')
 	l_col, r_col = st.columns(2)
-	show_miro_logo(st_asset = l_col)
 	with l_col.expander('Monocular Depth: CNN vs Transformers'):
 		st.info(f'''
-		Comparsion of two models: [BTS (CNN)](https://github.com/ErenBalatkan/Bts-PyTorch)
-		and [DPT (Transformer)](https://huggingface.co/Intel/dpt-large)
 		''')
 	model_zoo = get_model_zoo()
 	im = get_image(st_asset = r_col.expander('Input Image', expanded = True), extension_list = ['jpg','jpeg'])
 	model_name = l_col.selectbox('Pick Model', options = list(model_zoo.keys()))
 	if im:
-		d_im = mono_depth(pil_im = im, model_name=model_name)
 		l_col, r_col = st.columns(2)
 		l_col.image(im, caption = 'Input Image')
 		r_col.image(d_im, caption = 'Depth Map')
 	else:
 		st.warning(f'please provide an image :point_up:')

 		im = np.array(im)
 	return im
+def show_miro_logo(use_column_width = False, width = 100, st_asset= st.sidebar, str_color = 'white'):
+    logo_url = f'https://miro-ps-bucket-copy.s3.us-west-2.amazonaws.com/storage/jho/web_asset/logo/miro_logo_{str_color}.png'
+    st_asset.image(logo_url, use_column_width = use_column_width, channels = 'BGR', output_format = 'PNG', width = width)
+def im_apply_mask(im_rgb_array, mask_array, get_pil_im = False, bg_rgb_tup = None,
+	bg_blur_radius = None, bg_greyscale = False, mask_gblur_radius = 0):
 	'''
+	return either a np array with 4 channels or PIL Image with alpha
+	ref: https://stackoverflow.com/questions/47723154/how-to-use-pil-paste-with-mask
+	ref: https://stackoverflow.com/questions/62273005/compositing-images-by-blurred-mask-in-numpy
+	ref: https://stackoverflow.com/questions/62968174/for-pil-imagefilter-gaussianblur-how-what-kernel-is-used-and-does-the-radius-par
 	Args:
+		bg_rgb_tup: if given, return a 3-channel image with color background instead of transparent
+		bg_blur_radius: if given, return a 3-channel image with GaussianBlur applied to the background
 	'''
+	h, w, c = im_rgb_array.shape
+	m_h, m_w = mask_array.shape
+	if not all([h == m_h, w == m_w]):
+		raise ValueError(f'im_apply_mask: mask_array size {(m_h, m_w)} must match im_rgb_array {(h, w)}')
+	im = Image.fromarray(im_rgb_array)
+	# convert bitwise mask from np to pillow
+	# ref: https://note.nkmk.me/en/python-pillow-paste/
+	pil_mask = Image.fromarray(np.uint8(255* mask_array))
+	pil_mask = pil_mask.filter(
+					ImageFilter.GaussianBlur(radius = mask_gblur_radius)
+				) if mask_gblur_radius > 0 else pil_mask
+	if bg_rgb_tup:
+		bg_im = np.zeros([h,w,3], dtype = np.uint8) # black
+		bg_im[:,:] = bg_rgb_tup						# apply color
+		# old method using just np but doesn't support blurred mask
+		# idx = (mask_array != 0)
+		# bg_im[idx] = im_rgb_array[idx]
+		bg_im = Image.fromarray(bg_im)
+		bg_im.paste(im, mask = pil_mask)
+		im = bg_im
+	elif bg_blur_radius:
+		bg_im = im.copy().filter(
+					ImageFilter.GaussianBlur(radius = bg_blur_radius)
+				)
+		bg_im.paste(im, mask = pil_mask)
+		im = bg_im
+	elif bg_greyscale:
+		bg_im = ImageOps.grayscale(Image.fromarray(im_rgb_array))
+		bg_im = np.array(bg_im)
+		bg_im = np.stack((bg_im,)*3, axis = -1) 	# greyscale 1-channel to 3-channel
+		bg_im =  Image.fromarray(bg_im)
+		bg_im.paste(im, mask = pil_mask)
+		im = bg_im
 	else:
+		im.putalpha(pil_mask)
+	return im if get_pil_im else np.array(im)
 ### Streamlit App ###
+# @st.experimental_memo
 @st.cache(allow_output_mutation = True)
 def get_model_zoo():
 	model_zoo = {
 		'DPT': {'infer_func': DPT.inference,'model': DPT.load_model()},
+		'BTS': {'infer_func': BTS_infer.inference,'model': BTS_infer.get_model()}
 	}
 	return model_zoo
+# @st.experimental_memo(suppress_st_warning=True)
+@st.cache(suppress_st_warning=True,
+	hash_funcs={st.delta_generator.DeltaGenerator: lambda _:None})
+def mono_depth(pil_im, model_name, _st_asset = None):
 	s_time = time.time()
 	model_zoo = get_model_zoo()
 	infer_func = model_zoo[model_name]['infer_func']
 	model_obj = model_zoo[model_name]['model']
 	depth_im = infer_func(img_array_rgb = np.array(pil_im),
 					model_obj = model_obj)
+	if _st_asset:
+		with _st_asset:
+			st.info(f'''
+				model name: {model_name}\n
+				inference time: `{round(time.time()-s_time,2)}` seconds\n
+				depth image shape: {np.array(depth_im).shape}\n
+				depth image type: {type(depth_im)}\n
+				depth map min-max: {depth_im.min()}, {depth_im.max()}
+				''')
 	return depth_im
+def Main(): # streamlit version 1.9.2
+	st.set_page_config(
+		layout = 'wide',
+		page_title = 'Monocular Depth',
+		page_icon = 'https://miro.io/favicon-32x32.png',
+		initial_sidebar_state = 'collapsed'
+		)
 	l_col, r_col = st.columns(2)
+	show_miro_logo(st_asset = l_col, str_color = 'purple', width = 200)
 	with l_col.expander('Monocular Depth: CNN vs Transformers'):
 		st.info(f'''
+		Comparsion of two [SoTA](https://paperswithcode.com/sota/monocular-depth-estimation-on-nyu-depth-v2) models:
+		[BTS (CNN), 2019](https://github.com/ErenBalatkan/Bts-PyTorch)
+		and [DPT (Transformer), 2021](https://huggingface.co/Intel/dpt-large)
 		''')
 	model_zoo = get_model_zoo()
 	im = get_image(st_asset = r_col.expander('Input Image', expanded = True), extension_list = ['jpg','jpeg'])
 	model_name = l_col.selectbox('Pick Model', options = list(model_zoo.keys()))
 	if im:
+		d_im = mono_depth(pil_im = im, model_name=model_name,
+				_st_asset = r_col.expander('inference info'))
 		l_col, r_col = st.columns(2)
 		l_col.image(im, caption = 'Input Image')
 		r_col.image(d_im, caption = 'Depth Map')
+		with l_col.form('depth filter'):
+			min_d, max_d = st.slider('Depth Filter', value = (0,255),
+								help = 'smaller value = further away from camera',
+								min_value = 0, max_value = 255)
+			submitted = st.form_submit_button('filter depth')
+		if submitted:
+			depth_mask = ((d_im>= min_d) & (d_im<=max_d))
+			depth_filter_im = im_apply_mask(np.array(im),mask_array = depth_mask)
+			r_col.image(depth_filter_im, caption = 'Depth Filtered Image')
 	else:
 		st.warning(f'please provide an image :point_up:')