andywang commited on
Commit
e3dcfcf
β€’
1 Parent(s): e4a653e

add monoscene lite

Browse files
.gitignore CHANGED
@@ -1,4 +1,4 @@
1
- __pycache__
2
  .ipynb_checkpoints
3
  *.ckpt
4
  gradio*
 
1
+ *__pycache__*
2
  .ipynb_checkpoints
3
  *.ckpt
4
  gradio*
__pycache__/fusion.cpython-37.pyc DELETED
Binary file (14.9 kB)
 
app.py CHANGED
@@ -10,16 +10,13 @@ from monoscene.monoscene import MonoScene
10
  csv.field_size_limit(sys.maxsize)
11
  torch.set_grad_enabled(False)
12
 
13
- # pipeline = pipeline(model="anhquancao/monoscene_kitti")
14
- # model = AutoModel.from_pretrained(
15
- # "anhquancao/monoscene_kitti", trust_remote_code=True, revision='bf033f87c2a86b60903ab811b790a1532c1ae313'
16
- # )#.cuda()
17
  model = MonoScene.load_from_checkpoint(
18
  "monoscene_kitti.ckpt",
19
  dataset="kitti",
20
  n_classes=20,
21
  feature = 64,
22
- project_scale = 2,
23
  full_scene_size = (256, 256, 32),
24
  )
25
 
@@ -45,19 +42,16 @@ def predict(img):
45
  batch[k] = batch[k].unsqueeze(0)#.cuda()
46
 
47
  pred = model(batch).squeeze()
48
- # print(pred.shape)
49
- # pred = majority_pooling(pred, k_size=2)
50
- fig = draw(pred, batch['fov_mask_1'])
51
 
52
 
53
  return fig
54
 
55
- # The output is <b>downsampled by 2</b> to be able to be rendered in browsers.
56
 
57
  description = """
58
  MonoScene Demo on SemanticKITTI Validation Set (Sequence 08), which uses the <b>camera parameters of Sequence 08</b>.
59
  Due to the <b>CPU-only</b> inference, it might take up to 20s to predict a scene. \n
60
- <b>Darker</b> colors represent the <b>scenery outside the Field of View</b>, i.e. not visible on the image.
61
  <center>
62
  <a href="https://cv-rits.github.io/MonoScene/">
63
  <img style="display:inline" alt="Project page" src="https://img.shields.io/badge/Project%20Page-MonoScene-red">
@@ -66,15 +60,17 @@ Due to the <b>CPU-only</b> inference, it might take up to 20s to predict a scene
66
  <a href="https://github.com/cv-rits/MonoScene"><img style="display:inline" src="https://img.shields.io/github/stars/cv-rits/MonoScene?style=social"></a>
67
  </center>
68
  """
69
- title = "MonoScene: Monocular 3D Semantic Scene Completion"
70
  article="""
71
  <center>
72
- <img src='https://visitor-badge.glitch.me/badge?page_id=anhquancao.MonoScene&left_color=darkmagenta&right_color=purple' alt='visitor badge'>
73
  </center>
74
  """
75
 
76
  examples = [
77
- 'images/08/000010.jpg',
 
 
78
  'images/08/000085.jpg',
79
  'images/08/000290.jpg',
80
  'images/08/000465.jpg',
@@ -83,10 +79,10 @@ examples = [
83
  'images/08/001380.jpg',
84
  'images/08/001530.jpg',
85
  'images/08/002360.jpg',
86
- 'images/08/002505.jpg',
87
  'images/08/004059.jpg',
88
  'images/08/003149.jpg',
89
  'images/08/001446.jpg',
 
90
  'images/08/001122.jpg',
91
  'images/08/003533.jpg',
92
  'images/08/003365.jpg',
@@ -108,6 +104,7 @@ examples = [
108
 
109
 
110
 
 
111
  demo = gr.Interface(
112
  predict,
113
  gr.Image(shape=(1220, 370)),
 
10
  csv.field_size_limit(sys.maxsize)
11
  torch.set_grad_enabled(False)
12
 
13
+
 
 
 
14
  model = MonoScene.load_from_checkpoint(
15
  "monoscene_kitti.ckpt",
16
  dataset="kitti",
17
  n_classes=20,
18
  feature = 64,
19
+ project_scale = 4,
20
  full_scene_size = (256, 256, 32),
21
  )
22
 
 
42
  batch[k] = batch[k].unsqueeze(0)#.cuda()
43
 
44
  pred = model(batch).squeeze()
45
+ fig = draw(pred, batch['fov_mask_2'])
 
 
46
 
47
 
48
  return fig
49
 
 
50
 
51
  description = """
52
  MonoScene Demo on SemanticKITTI Validation Set (Sequence 08), which uses the <b>camera parameters of Sequence 08</b>.
53
  Due to the <b>CPU-only</b> inference, it might take up to 20s to predict a scene. \n
54
+ This is a <b>smaller</b> model with half resolution and <b>w/o 3D CRP</b>. You can find the full model at: <a href="https://huggingface.co/spaces/CVPR/MonoScene">https://huggingface.co/spaces/CVPR/MonoScene</a>
55
  <center>
56
  <a href="https://cv-rits.github.io/MonoScene/">
57
  <img style="display:inline" alt="Project page" src="https://img.shields.io/badge/Project%20Page-MonoScene-red">
 
60
  <a href="https://github.com/cv-rits/MonoScene"><img style="display:inline" src="https://img.shields.io/github/stars/cv-rits/MonoScene?style=social"></a>
61
  </center>
62
  """
63
+ title = "MonoScene Lite - Half resolution, w/o 3D CRP"
64
  article="""
65
  <center>
66
+ <img src='https://visitor-badge.glitch.me/badge?page_id=anhquancao.MonoScene_lite&left_color=darkmagenta&right_color=purple' alt='visitor badge'>
67
  </center>
68
  """
69
 
70
  examples = [
71
+ 'images/08/001385.jpg',
72
+ 'images/08/000295.jpg',
73
+ 'images/08/002505.jpg',
74
  'images/08/000085.jpg',
75
  'images/08/000290.jpg',
76
  'images/08/000465.jpg',
 
79
  'images/08/001380.jpg',
80
  'images/08/001530.jpg',
81
  'images/08/002360.jpg',
 
82
  'images/08/004059.jpg',
83
  'images/08/003149.jpg',
84
  'images/08/001446.jpg',
85
+ 'images/08/000010.jpg',
86
  'images/08/001122.jpg',
87
  'images/08/003533.jpg',
88
  'images/08/003365.jpg',
 
104
 
105
 
106
 
107
+
108
  demo = gr.Interface(
109
  predict,
110
  gr.Image(shape=(1220, 370)),
helpers.py CHANGED
@@ -126,7 +126,7 @@ def get_grid_coords(dims, resolution):
126
  return coords_grid
127
 
128
  def get_projections(img_W, img_H):
129
- scale_3ds = [1, 2]
130
  data = {}
131
  for scale_3d in scale_3ds:
132
  scene_size = (51.2, 51.2, 6.4)
@@ -188,7 +188,7 @@ def draw(
188
  fov_mask,
189
  # img_size,
190
  # f,
191
- voxel_size=0.2,
192
  # d=7, # 7m - determine the size of the mesh representing the camera
193
  ):
194
 
 
126
  return coords_grid
127
 
128
  def get_projections(img_W, img_H):
129
+ scale_3ds = [2, 4]
130
  data = {}
131
  for scale_3d in scale_3ds:
132
  scene_size = (51.2, 51.2, 6.4)
 
188
  fov_mask,
189
  # img_size,
190
  # f,
191
+ voxel_size=0.4,
192
  # d=7, # 7m - determine the size of the mesh representing the camera
193
  ):
194
 
images/08/000295.jpg ADDED
images/08/001385.jpg ADDED
monoscene/__pycache__/CRP3D.cpython-37.pyc DELETED
Binary file (2.34 kB)
 
monoscene/__pycache__/DDR.cpython-37.pyc DELETED
Binary file (3.07 kB)
 
monoscene/__pycache__/__init__.cpython-37.pyc DELETED
Binary file (144 Bytes)
 
monoscene/__pycache__/config.cpython-37.pyc DELETED
Binary file (1.19 kB)
 
monoscene/__pycache__/flosp.cpython-37.pyc DELETED
Binary file (1.26 kB)
 
monoscene/__pycache__/modules.cpython-37.pyc DELETED
Binary file (6.39 kB)
 
monoscene/__pycache__/monoscene.cpython-37.pyc DELETED
Binary file (2.48 kB)
 
monoscene/__pycache__/monoscene_model.cpython-37.pyc DELETED
Binary file (953 Bytes)
 
monoscene/__pycache__/unet2d.cpython-37.pyc DELETED
Binary file (5.36 kB)
 
monoscene/__pycache__/unet3d_kitti.cpython-37.pyc DELETED
Binary file (2.01 kB)
 
monoscene/__pycache__/unet3d_nyu.cpython-37.pyc DELETED
Binary file (2.14 kB)
 
monoscene/monoscene.py CHANGED
@@ -96,15 +96,15 @@ class MonoScene(pl.LightningModule):
96
  if x3d is None:
97
  x3d = self.projects[str(scale_2d)](
98
  x_rgb["1_" + str(scale_2d)][i],
99
- torch.div(projected_pix, scale_2d, rounding_mode='floor'),
100
- # projected_pix // scale_2d,
101
  fov_mask,
102
  )
103
  else:
104
  x3d += self.projects[str(scale_2d)](
105
  x_rgb["1_" + str(scale_2d)][i],
106
- torch.div(projected_pix, scale_2d, rounding_mode='floor'),
107
- # projected_pix // scale_2d,
108
  fov_mask,
109
  )
110
  x3ds.append(x3d)
 
96
  if x3d is None:
97
  x3d = self.projects[str(scale_2d)](
98
  x_rgb["1_" + str(scale_2d)][i],
99
+ # torch.div(projected_pix, scale_2d, rounding_mode='floor'),
100
+ projected_pix // scale_2d,
101
  fov_mask,
102
  )
103
  else:
104
  x3d += self.projects[str(scale_2d)](
105
  x_rgb["1_" + str(scale_2d)][i],
106
+ # torch.div(projected_pix, scale_2d, rounding_mode='floor'),
107
+ projected_pix // scale_2d,
108
  fov_mask,
109
  )
110
  x3ds.append(x3d)
monoscene_kitti.ckpt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f0d1324885166f17949bf2dcfc0ee1eb2d2aedd0f48e75b56bb2beb87c1ce3a
3
- size 1796467007
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82b3bff496ca832738dd184d4c9f0d555deb5d26f0d4c6ce916b8b0ec6feab62
3
+ size 1612174535