vaishanthr commited on
Commit
00ab2e7
1 Parent(s): f5bc491

updated code files

Browse files
Files changed (3) hide show
  1. app.py +29 -21
  2. point_cloud_generator.py +77 -33
  3. utils.py +3 -2
app.py CHANGED
@@ -5,6 +5,7 @@ import numpy as np
5
  import os
6
  import torch
7
  import utils
 
8
 
9
  from image_segmenter import ImageSegmenter
10
  from monocular_depth_estimator import MonocularDepthEstimator
@@ -21,7 +22,9 @@ def process_image(image):
21
  image_segmentation, objects_data = img_seg.predict(image)
22
  depthmap, depth_colormap = depth_estimator.make_prediction(image)
23
  dist_image = utils.draw_depth_info(image, depthmap, objects_data)
24
- return image_segmentation, depth_colormap, dist_image
 
 
25
 
26
  def test_process_img(image):
27
  image = utils.resize(image)
@@ -32,14 +35,14 @@ def test_process_img(image):
32
  def process_video(vid_path=None):
33
  vid_cap = cv2.VideoCapture(vid_path)
34
  while vid_cap.isOpened():
35
- ret, frame = vid_cap.read()
36
  if ret:
37
  print("making predictions ....")
38
  frame = utils.resize(frame)
39
  image_segmentation, objects_data = img_seg.predict(frame)
40
  depthmap, depth_colormap = depth_estimator.make_prediction(frame)
41
  dist_image = utils.draw_depth_info(frame, depthmap, objects_data)
42
- yield cv2.cvtColor(image_segmentation, cv2.COLOR_BGR2RGB), depth_colormap, dist_image
43
 
44
  return None
45
 
@@ -76,9 +79,9 @@ if __name__ == "__main__":
76
 
77
  # image_segmentation, objects_data, depthmap, depth_colormap = test_process_img(img_1)
78
  # final_image = utils.draw_depth_info(image_segmentation, depthmap, objects_data)
79
- # objs_pcd = utils.generate_obj_pcd(depthmap, objects_data[2][3])
80
  # # print(objs_pcd[0][0])
81
- # # display_pcd(objs_pcd, use_matplotlib=False)
82
 
83
  # cv2.imshow("Segmentation", image_segmentation)
84
  # cv2.imshow("Depth", depthmap*objects_data[2][3])
@@ -91,11 +94,9 @@ if __name__ == "__main__":
91
  with gr.Blocks() as my_app:
92
 
93
  # title
94
- gr.Markdown(
95
- """
96
- # Object segmentation and depth estimation
97
- Input an image or Video
98
- """)
99
 
100
  # tabs
101
  with gr.Tab("Image"):
@@ -119,13 +120,17 @@ if __name__ == "__main__":
119
 
120
  with gr.Row():
121
  dist_img_output = gr.Image(height=300, label="Distance")
122
- pcd_img_output = gr.Image(height=300, label="Point Cloud")
123
 
124
  gr.Markdown("## Sample Images")
125
  gr.Examples(
126
- examples=[os.path.join(os.path.dirname(__file__), "assets/images/bus.jpg")],
 
 
 
 
127
  inputs=img_input,
128
- outputs=[segmentation_img_output, depth_img_output],
129
  fn=process_image,
130
  cache_examples=True,
131
  )
@@ -139,7 +144,7 @@ if __name__ == "__main__":
139
  "Medium - Balanced performance and accuracy",
140
  "Large - Slow performance and high accuracy"],
141
  label="Model Type", value="Small - Better performance and less accuracy",
142
- info="Select the inference model before running predictions!"),
143
 
144
  options_checkbox_vid = gr.CheckboxGroup(["Show Boundary Box", "Show Segmentation Region", "Show Segmentation Boundary"], label="Options")
145
  conf_thres_vid = gr.Slider(1, 100, value=60, label="Confidence Threshold", info="Choose the threshold above which objects should be detected")
@@ -149,33 +154,36 @@ if __name__ == "__main__":
149
 
150
  with gr.Column(scale=2):
151
  with gr.Row():
152
- segmentation_vid_output = gr.Image(height=400, label="Segmentation")
153
- depth_vid_output = gr.Image(height=400, label="Depth Estimation")
154
 
155
  with gr.Row():
156
  dist_vid_output = gr.Image(height=300, label="Distance")
157
- pcd_vid_output = gr.Image(height=300, label="Point Cloud")
158
 
159
  gr.Markdown("## Sample Videos")
160
  gr.Examples(
161
- examples=[os.path.join(os.path.dirname(__file__), "assets/videos/input_video.mp4")],
 
 
 
162
  inputs=vid_input,
163
  # outputs=vid_output,
164
  # fn=vid_segmenation,
165
  )
 
166
 
167
  # image tab logic
168
- submit_btn_img.click(process_image, inputs=img_input, outputs=[segmentation_img_output, depth_img_output, dist_img_output])
169
  options_checkbox_img.change(update_segmentation_options, options_checkbox_img, [])
170
  conf_thres_img.change(update_confidence_threshold, conf_thres_img, [])
171
  model_type_img.change(model_selector, model_type_img, [])
172
 
173
  # video tab logic
174
  submit_btn_vid.click(process_video, inputs=vid_input, outputs=[segmentation_vid_output, depth_vid_output, dist_vid_output])
 
175
  cancel_btn.click(cancel, inputs=[], outputs=[])
176
  options_checkbox_vid.change(update_segmentation_options, options_checkbox_vid, [])
177
- conf_thres_vid.change(update_confidence_threshold, conf_thres_vid, [])
178
-
179
 
180
 
181
  my_app.queue(concurrency_count=5, max_size=20).launch()
 
5
  import os
6
  import torch
7
  import utils
8
+ import plotly.graph_objects as go
9
 
10
  from image_segmenter import ImageSegmenter
11
  from monocular_depth_estimator import MonocularDepthEstimator
 
22
  image_segmentation, objects_data = img_seg.predict(image)
23
  depthmap, depth_colormap = depth_estimator.make_prediction(image)
24
  dist_image = utils.draw_depth_info(image, depthmap, objects_data)
25
+ objs_pcd = utils.generate_obj_pcd(depthmap, objects_data)
26
+ plot_fig = display_pcd(objs_pcd)
27
+ return image_segmentation, depth_colormap, dist_image, plot_fig
28
 
29
  def test_process_img(image):
30
  image = utils.resize(image)
 
35
  def process_video(vid_path=None):
36
  vid_cap = cv2.VideoCapture(vid_path)
37
  while vid_cap.isOpened():
38
+ ret, frame = vid_cap.read()
39
  if ret:
40
  print("making predictions ....")
41
  frame = utils.resize(frame)
42
  image_segmentation, objects_data = img_seg.predict(frame)
43
  depthmap, depth_colormap = depth_estimator.make_prediction(frame)
44
  dist_image = utils.draw_depth_info(frame, depthmap, objects_data)
45
+ yield cv2.cvtColor(image_segmentation, cv2.COLOR_BGR2RGB), depth_colormap, cv2.cvtColor(dist_image, cv2.COLOR_BGR2RGB)
46
 
47
  return None
48
 
 
79
 
80
  # image_segmentation, objects_data, depthmap, depth_colormap = test_process_img(img_1)
81
  # final_image = utils.draw_depth_info(image_segmentation, depthmap, objects_data)
82
+ # objs_pcd = utils.generate_obj_pcd(depthmap, objects_data)
83
  # # print(objs_pcd[0][0])
84
+ # display_pcd(objs_pcd, use_matplotlib=True)
85
 
86
  # cv2.imshow("Segmentation", image_segmentation)
87
  # cv2.imshow("Depth", depthmap*objects_data[2][3])
 
94
  with gr.Blocks() as my_app:
95
 
96
  # title
97
+ gr.Markdown("<h1><center>Simultaneous Segmentation and Depth Estimation</center></h1>")
98
+ gr.Markdown("<h3><center>Created by Vaishanth</center></h3>")
99
+ gr.Markdown("<h3><center>This model estimates the depth of segmented objects.</center></h3>")
 
 
100
 
101
  # tabs
102
  with gr.Tab("Image"):
 
120
 
121
  with gr.Row():
122
  dist_img_output = gr.Image(height=300, label="Distance")
123
+ pcd_img_output = gr.Plot(label="Point Cloud")
124
 
125
  gr.Markdown("## Sample Images")
126
  gr.Examples(
127
+ examples=[os.path.join(os.path.dirname(__file__), "assets/images/baggage_claim.jpg"),
128
+ os.path.join(os.path.dirname(__file__), "assets/images/kitchen_2.png"),
129
+ os.path.join(os.path.dirname(__file__), "assets/images/soccer.jpg"),
130
+ os.path.join(os.path.dirname(__file__), "assets/images/room_2.png"),
131
+ os.path.join(os.path.dirname(__file__), "assets/images/living_room.jpg")],
132
  inputs=img_input,
133
+ outputs=[segmentation_img_output, depth_img_output, dist_img_output, pcd_img_output],
134
  fn=process_image,
135
  cache_examples=True,
136
  )
 
144
  "Medium - Balanced performance and accuracy",
145
  "Large - Slow performance and high accuracy"],
146
  label="Model Type", value="Small - Better performance and less accuracy",
147
+ info="Select the inference model before running predictions!")
148
 
149
  options_checkbox_vid = gr.CheckboxGroup(["Show Boundary Box", "Show Segmentation Region", "Show Segmentation Boundary"], label="Options")
150
  conf_thres_vid = gr.Slider(1, 100, value=60, label="Confidence Threshold", info="Choose the threshold above which objects should be detected")
 
154
 
155
  with gr.Column(scale=2):
156
  with gr.Row():
157
+ segmentation_vid_output = gr.Image(height=300, label="Segmentation")
158
+ depth_vid_output = gr.Image(height=300, label="Depth Estimation")
159
 
160
  with gr.Row():
161
  dist_vid_output = gr.Image(height=300, label="Distance")
 
162
 
163
  gr.Markdown("## Sample Videos")
164
  gr.Examples(
165
+ examples=[os.path.join(os.path.dirname(__file__), "assets/videos/input_video.mp4"),
166
+ os.path.join(os.path.dirname(__file__), "assets/videos/driving.mp4"),
167
+ os.path.join(os.path.dirname(__file__), "assets/videos/overpass.mp4"),
168
+ os.path.join(os.path.dirname(__file__), "assets/videos/walking.mp4")],
169
  inputs=vid_input,
170
  # outputs=vid_output,
171
  # fn=vid_segmenation,
172
  )
173
+
174
 
175
  # image tab logic
176
+ submit_btn_img.click(process_image, inputs=img_input, outputs=[segmentation_img_output, depth_img_output, dist_img_output, pcd_img_output])
177
  options_checkbox_img.change(update_segmentation_options, options_checkbox_img, [])
178
  conf_thres_img.change(update_confidence_threshold, conf_thres_img, [])
179
  model_type_img.change(model_selector, model_type_img, [])
180
 
181
  # video tab logic
182
  submit_btn_vid.click(process_video, inputs=vid_input, outputs=[segmentation_vid_output, depth_vid_output, dist_vid_output])
183
+ model_type_vid.change(model_selector, model_type_vid, [])
184
  cancel_btn.click(cancel, inputs=[], outputs=[])
185
  options_checkbox_vid.change(update_segmentation_options, options_checkbox_vid, [])
186
+ conf_thres_vid.change(update_confidence_threshold, conf_thres_vid, [])
 
187
 
188
 
189
  my_app.queue(concurrency_count=5, max_size=20).launch()
point_cloud_generator.py CHANGED
@@ -2,7 +2,7 @@ import cv2
2
  import numpy as np
3
  import matplotlib.pyplot as plt
4
  import open3d as o3d
5
-
6
 
7
 
8
 
@@ -70,6 +70,7 @@ class PointCloudGenerator:
70
 
71
  def generate_point_cloud(self, depth_img, normalize=False):
72
 
 
73
 
74
  if normalize:
75
  # normalizing depth image
@@ -81,49 +82,92 @@ class PointCloudGenerator:
81
  # convert depth to point cloud
82
  # point_cloud = self.conver_to_point_cloud(depth_img)
83
 
84
- depth_image = o3d.geometry.Image(depth_img)
 
 
 
 
 
 
 
85
 
86
- # Create open3d camera intrinsic object
87
- intrinsic_matrix = np.array([[self.fx_depth, 0, self.cx_depth], [0, self.fy_depth, self.cy_depth], [0, 0, 1]])
88
- camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
89
- # camera_intrinsic.intrinsic_matrix = intrinsic_matrix
90
- camera_intrinsic.set_intrinsics(depth_image.width, depth_image.height, self.fx_depth, self.fy_depth, self.cx_depth, self.cy_depth)
91
 
92
 
93
  # Create open3d point cloud from depth image
94
- point_cloud = o3d.geometry.PointCloud.create_from_depth_image(depth_img, camera_intrinsic)
 
95
 
96
  return point_cloud
97
 
98
- def display_pcd(pcd_data, use_matplotlib=True):
99
 
100
- if use_matplotlib:
101
- fig = plt.figure()
102
- ax = fig.add_subplot(111, projection='3d')
103
 
104
- for data, clr in pcd_data:
105
- points = np.array(data)
106
- skip = 5
107
- point_range = range(0, points.shape[0], skip) # skip points to prevent crash
 
108
 
109
- if use_matplotlib:
110
- ax.scatter(points[point_range, 0], points[point_range, 1], points[point_range, 2], c='r', marker='o')
111
 
112
- if not use_matplotlib:
113
- pcd_o3d = o3d.geometry.PointCloud() # create point cloud object
114
- pcd_o3d.points = o3d.utility.Vector3dVector(points) # set pcd_np as the point cloud points
115
- # Visualize:
116
- o3d.visualization.draw_geometries([pcd_o3d])
117
-
118
- if use_matplotlib:
119
- ax.set_xlabel('X Label')
120
- ax.set_ylabel('Y Label')
121
- ax.set_zlabel('Z Label')
122
- ax.view_init(elev=90, azim=0, roll=0)
123
- plt.show()
124
-
125
- if not use_matplotlib:
126
- o3d.visualization.draw_geometries([pcd_o3d])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  if __name__ == "__main__":
129
  depth_img_path = "assets/images/depth_map_p1.png"
 
2
  import numpy as np
3
  import matplotlib.pyplot as plt
4
  import open3d as o3d
5
+ import plotly.graph_objects as go
6
 
7
 
8
 
 
70
 
71
  def generate_point_cloud(self, depth_img, normalize=False):
72
 
73
+ depth_img = np.array(depth_img)
74
 
75
  if normalize:
76
  # normalizing depth image
 
82
  # convert depth to point cloud
83
  # point_cloud = self.conver_to_point_cloud(depth_img)
84
 
85
+ # depth_image = o3d.geometry.Image(depth_img)
86
+ depth_image = o3d.geometry.Image(np.ascontiguousarray(depth_img))
87
+
88
+ # # Create open3d camera intrinsic object
89
+ # intrinsic_matrix = np.array([[self.fx_depth, 0, self.cx_depth], [0, self.fy_depth, self.cy_depth], [0, 0, 1]])
90
+ # camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
91
+ # # camera_intrinsic.intrinsic_matrix = intrinsic_matrix
92
+ # camera_intrinsic.set_intrinsics(640, 480, self.fx_depth, self.fy_depth, self.cx_depth, self.cy_depth)
93
 
94
+ # camera settings
95
+ # camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
96
+ # depth_img.shape[0], depth_img.shape[1], 500, 500, depth_img.shape[0] / 2, depth_img.shape[1] / 2
97
+ # )
 
98
 
99
 
100
  # Create open3d point cloud from depth image
101
+ point_cloud = o3d.geometry.PointCloud.create_from_depth_image(depth_image,
102
+ o3d.camera.PinholeCameraIntrinsic( o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault))
103
 
104
  return point_cloud
105
 
106
+ # def display_pcd(pcd_data, use_matplotlib=True):
107
 
108
+ # if use_matplotlib:
109
+ # fig = plt.figure()
110
+ # ax = fig.add_subplot(111, projection='3d')
111
 
112
+ # for data, clr in pcd_data:
113
+ # # points = np.array(data)
114
+ # points = np.asarray(data.points)
115
+ # skip = 5
116
+ # point_range = range(0, points.shape[0], skip) # skip points to prevent crash
117
 
118
+ # if use_matplotlib:
119
+ # ax.scatter(points[point_range, 0], points[point_range, 1], points[point_range, 2]*100, c=list(clr).append(1), marker='o')
120
 
121
+ # # if not use_matplotlib:
122
+ # # pcd_o3d = o3d.geometry.PointCloud() # create point cloud object
123
+ # # pcd_o3d.points = o3d.utility.Vector3dVector(points) # set pcd_np as the point cloud points
124
+ # # # Visualize:
125
+ # # o3d.visualization.draw_geometries([pcd_o3d])
126
+
127
+ # if use_matplotlib:
128
+ # ax.set_xlabel('X Label')
129
+ # ax.set_ylabel('Y Label')
130
+ # ax.set_zlabel('Z Label')
131
+ # ax.view_init(elev=-90, azim=0, roll=-90)
132
+ # # plt.show()
133
+ # return fig
134
+
135
+ # if not use_matplotlib:
136
+ # o3d.visualization.draw_geometries([pcd_o3d])
137
+
138
+ def display_pcd(pcd_data):
139
+ fig = go.Figure()
140
+
141
+ for data, clr in pcd_data:
142
+ points = np.asarray(data.points)
143
+ skip = 1
144
+ point_range = range(0, points.shape[0], skip)
145
+
146
+ fig.add_trace(go.Scatter3d(
147
+ x=points[point_range, 0],
148
+ y=points[point_range, 1],
149
+ z=points[point_range, 2]*100,
150
+ mode='markers',
151
+ marker=dict(
152
+ size=1,
153
+ color='rgb'+str(clr),
154
+ opacity=1
155
+ )
156
+ ))
157
+
158
+ fig.update_layout(
159
+ scene=dict(
160
+ xaxis_title='X Label',
161
+ yaxis_title='Y Label',
162
+ zaxis_title='Z Label',
163
+ camera=dict(
164
+ eye=dict(x=0, y=0, z=-1),
165
+ # up=dict(x=0, y=0, z=1),
166
+ )
167
+ )
168
+ )
169
+
170
+ return fig
171
 
172
  if __name__ == "__main__":
173
  depth_img_path = "assets/images/depth_map_p1.png"
utils.py CHANGED
@@ -27,7 +27,8 @@ def draw_depth_info(image, depth_map, objects_data):
27
  center = data[2]
28
  mask = data[3]
29
  _, depth = get_masked_depth(depth_map, mask)
30
- cv2.putText(image, str(round(depth*10, 2))+'m', center, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
 
31
 
32
  return image
33
 
@@ -35,7 +36,7 @@ def generate_obj_pcd(depth_map, objects_data):
35
  objs_pcd = []
36
  pcd_generator = PointCloudGenerator()
37
 
38
- for data in objects_data[:2]:
39
  mask = data[3]
40
  cls_clr = data[4]
41
  masked_depth = depth_map*mask
 
27
  center = data[2]
28
  mask = data[3]
29
  _, depth = get_masked_depth(depth_map, mask)
30
+ cv2.rectangle(image, (center[0]-15, center[1]-15), (center[0]+(len(str(round(depth*10, 2))+'m')*12), center[1]+15), data[4], -1)
31
+ cv2.putText(image, str(round(depth*10, 2))+'m', (center[0]-5, center[1]+5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
32
 
33
  return image
34
 
 
36
  objs_pcd = []
37
  pcd_generator = PointCloudGenerator()
38
 
39
+ for data in objects_data:
40
  mask = data[3]
41
  cls_clr = data[4]
42
  masked_depth = depth_map*mask