JianyuanWang commited on
Commit
d814d5a
1 Parent(s): dc32bb2
app.py CHANGED
@@ -15,22 +15,26 @@ from datetime import datetime
15
  from vggsfm_code.hf_demo import demo_fn
16
  from omegaconf import DictConfig, OmegaConf
17
  from viz_utils.viz_fn import add_camera
18
-
19
  #
20
  from scipy.spatial.transform import Rotation
21
  import PIL
22
 
23
 
24
- import spaces
25
 
26
- @spaces.GPU
27
  def vggsfm_demo(
28
- input_image,
29
  input_video,
 
30
  query_frame_num,
31
- max_query_pts
32
- # grid_size: int = 10,
33
  ):
 
 
 
 
 
34
  cfg_file = "vggsfm_code/cfgs/demo.yaml"
35
  cfg = OmegaConf.load(cfg_file)
36
 
@@ -61,6 +65,7 @@ def vggsfm_demo(
61
 
62
  fps = vs.get(cv2.CAP_PROP_FPS)
63
 
 
64
  frame_rate = 1
65
  frame_interval = int(fps * frame_rate)
66
 
@@ -70,12 +75,14 @@ def vggsfm_demo(
70
  while video_frame_num<=max_input_image:
71
  (gotit, frame) = vs.read()
72
  count +=1
 
 
 
73
 
74
  if count % frame_interval == 0:
75
  cv2.imwrite(target_dir_images+"/"+f"{video_frame_num:06}.png", frame)
76
  video_frame_num+=1
77
- if not gotit:
78
- break
79
  if video_frame_num<3:
80
  return None, "Please input at least three frames"
81
  else:
@@ -86,13 +93,16 @@ def vggsfm_demo(
86
  print(f"Files have been copied to {target_dir_images}")
87
  cfg.SCENE_DIR = target_dir
88
 
89
- try:
90
- predictions = demo_fn(cfg)
91
- except:
92
- return None, "Something seems to be incorrect. Please verify that your inputs are formatted correctly. If the issue persists, kindly create a GitHub issue for further assistance."
93
 
94
- glbfile = vggsfm_predictions_to_glb(predictions)
95
 
 
 
 
96
 
97
  print(input_image)
98
  print(input_video)
@@ -132,12 +142,6 @@ def vggsfm_predictions_to_glb(predictions):
132
  cur_cam_color = camera_edge_colors[idx % len(camera_edge_colors)]
133
  cur_focal = intrinsics_opencv[idx, 0, 0]
134
 
135
- # cur_image_path = raw_image_paths[idx]
136
-
137
- # cur_image = np.array(PIL.Image.open(cur_image_path))
138
- # add_camera(glbscene, cam_to_world, cur_cam_color, image=None, imsize=cur_image.shape[1::-1],
139
- # focal=None,screen_width=0.3)
140
-
141
  add_camera(glbscene, cam_to_world, cur_cam_color, image=None, imsize=(1024,1024),
142
  focal=None,screen_width=0.35)
143
 
@@ -150,35 +154,59 @@ def vggsfm_predictions_to_glb(predictions):
150
  rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
151
  glbscene.apply_transform(np.linalg.inv(np.linalg.inv(extrinsics_opencv_4x4[0]) @ opengl_mat @ rot))
152
 
153
- glbfile = "glbscene.glb"
154
- glbscene.export(file_obj=glbfile)
155
- return glbfile
 
 
 
 
 
 
 
156
 
 
157
 
158
 
159
 
 
 
 
160
 
161
  if True:
162
  demo = gr.Interface(
163
  title="🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion",
164
- description="<div style='text-align: left;'> \
165
- <p>Welcome to <a href='https://github.com/facebookresearch/vggsfm' target='_blank'>VGGSfM</a>!",
166
  fn=vggsfm_demo,
167
  inputs=[
168
- gr.File(file_count="multiple", label="Input Images", interactive=True),
169
  gr.Video(label="Input video", interactive=True),
 
170
  gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images"),
171
  gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points"),
172
  ],
173
  outputs=[gr.Model3D(label="Reconstruction"), gr.Textbox(label="Log")],
 
 
 
 
174
  cache_examples=True,
175
  # allow_flagging=False,
176
  allow_flagging='never', # Updated from False to 'never'
177
- concurrency_limit=1 # Added concurrency_limit to Interface
 
 
 
 
 
 
 
 
 
 
 
 
178
  )
179
- demo.queue(max_size=20).launch(debug=True)
180
 
181
- # demo.launch(debug=True, share=True)
182
  else:
183
  import glob
184
  files = glob.glob(f'vggsfm_code/examples/cake/images/*', recursive=True)
 
15
  from vggsfm_code.hf_demo import demo_fn
16
  from omegaconf import DictConfig, OmegaConf
17
  from viz_utils.viz_fn import add_camera
18
+ import glob
19
  #
20
  from scipy.spatial.transform import Rotation
21
  import PIL
22
 
23
 
24
+ # import spaces
25
 
26
+ # @spaces.GPU
27
  def vggsfm_demo(
 
28
  input_video,
29
+ input_image,
30
  query_frame_num,
31
+ max_query_pts=4096,
 
32
  ):
33
+
34
+ if input_video is not None:
35
+ if not isinstance(input_video, str):
36
+ input_video = input_video["video"]["path"]
37
+
38
  cfg_file = "vggsfm_code/cfgs/demo.yaml"
39
  cfg = OmegaConf.load(cfg_file)
40
 
 
65
 
66
  fps = vs.get(cv2.CAP_PROP_FPS)
67
 
68
+
69
  frame_rate = 1
70
  frame_interval = int(fps * frame_rate)
71
 
 
75
  while video_frame_num<=max_input_image:
76
  (gotit, frame) = vs.read()
77
  count +=1
78
+
79
+ if not gotit:
80
+ break
81
 
82
  if count % frame_interval == 0:
83
  cv2.imwrite(target_dir_images+"/"+f"{video_frame_num:06}.png", frame)
84
  video_frame_num+=1
85
+
 
86
  if video_frame_num<3:
87
  return None, "Please input at least three frames"
88
  else:
 
93
  print(f"Files have been copied to {target_dir_images}")
94
  cfg.SCENE_DIR = target_dir
95
 
96
+ # try:
97
+ predictions = demo_fn(cfg)
98
+ # except:
99
+ # return None, "Something seems to be incorrect. Please verify that your inputs are formatted correctly. If the issue persists, kindly create a GitHub issue for further assistance."
100
 
101
+ glbscene = vggsfm_predictions_to_glb(predictions)
102
 
103
+ glbfile = target_dir + "/glbscene.glb"
104
+ glbscene.export(file_obj=glbfile)
105
+
106
 
107
  print(input_image)
108
  print(input_video)
 
142
  cur_cam_color = camera_edge_colors[idx % len(camera_edge_colors)]
143
  cur_focal = intrinsics_opencv[idx, 0, 0]
144
 
 
 
 
 
 
 
145
  add_camera(glbscene, cam_to_world, cur_cam_color, image=None, imsize=(1024,1024),
146
  focal=None,screen_width=0.35)
147
 
 
154
  rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
155
  glbscene.apply_transform(np.linalg.inv(np.linalg.inv(extrinsics_opencv_4x4[0]) @ opengl_mat @ rot))
156
 
157
+ # glbfile = "glbscene.glb"
158
+ # glbscene.export(file_obj=glbfile)
159
+ return glbscene
160
+
161
+ # apple_video = "vggsfm_code/examples/videos/apple_video.mp4"
162
+ # os.path.join(os.path.dirname(__file__), "apple_video.mp4")
163
+ british_museum_video = "vggsfm_code/examples/videos/british_museum_video.mp4"
164
+
165
+ # os.path.join(os.path.dirname(__file__), "british_museum_video.mp4")
166
+ cake_video = "vggsfm_code/examples/videos/cake_video.mp4"
167
 
168
+ # os.path.join(os.path.dirname(__file__), "cake_video.mp4")
169
 
170
 
171
 
172
+ # apple_images = glob.glob(f'vggsfm_code/examples/apple/images/*')
173
+ cake_images = glob.glob(f'vggsfm_code/examples/cake/images/*')
174
+ british_museum_images = glob.glob(f'vggsfm_code/examples/british_museum/images/*')
175
 
176
  if True:
177
  demo = gr.Interface(
178
  title="🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion",
 
 
179
  fn=vggsfm_demo,
180
  inputs=[
 
181
  gr.Video(label="Input video", interactive=True),
182
+ gr.File(file_count="multiple", label="Input Images", interactive=True),
183
  gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images"),
184
  gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points"),
185
  ],
186
  outputs=[gr.Model3D(label="Reconstruction"), gr.Textbox(label="Log")],
187
+ examples=[
188
+ # [apple_video, apple_images, 5, 2048],
189
+ [cake_video, cake_images, 3, 4096],
190
+ [british_museum_video, british_museum_images, 2, 4096],],
191
  cache_examples=True,
192
  # allow_flagging=False,
193
  allow_flagging='never', # Updated from False to 'never'
194
+ concurrency_limit=1, # Added concurrency_limit to Interface
195
+ description = """<div style="text-align: left;">
196
+ <p>Welcome to <a href="https://github.com/facebookresearch/vggsfm" target="_blank">VGGSfM</a> demo!
197
+ This space demonstrates 3D reconstruction from input image frames. </p>
198
+ <p>To get started quickly, you can click on our examples. If you want to reconstruct your own data, simply: </p>
199
+ <ul style="display: inline-block; text-align: left;">
200
+ <li>upload the images (.jpg, .png, etc.), or </li>
201
+ <li>upload a video (.mp4, .mov, etc.) </li>
202
+ </ul>
203
+ <p>If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract one image frame per second from the input video. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
204
+ <p>For more details, check our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
205
+ <p>(Please note that running reconstruction on Hugging Face space is slower than on a local machine.) </p>
206
+ </div>""",
207
  )
 
208
 
209
+ demo.launch(debug=True, share=True)
210
  else:
211
  import glob
212
  files = glob.glob(f'vggsfm_code/examples/cake/images/*', recursive=True)
images_to_videos.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import os
3
+
4
+ # Parameters
5
+
6
+ name = "cake"
7
+ folder_path = f'vggsfm_code/examples/{name}/images' # Update with the path to your images
8
+ video_path = f'{name}_video.mp4'
9
+ fps = 1 # frames per second
10
+
11
+ # Get all image files from the directory
12
+ images = [img for img in os.listdir(folder_path)]
13
+ images.sort() # Sort the images by name
14
+
15
+ # Read the first image to get the size
16
+ frame = cv2.imread(os.path.join(folder_path, images[0]))
17
+ height, width, layers = frame.shape
18
+
19
+ # Define the codec and create VideoWriter object
20
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v') # or 'x264' for codec
21
+ video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
22
+
23
+ # Add images to video
24
+ for image in images:
25
+ video.write(cv2.imread(os.path.join(folder_path, image)))
26
+
27
+ # Release the video writer
28
+ video.release()
vggsfm_code/examples/videos/british_museum_video.mp4 ADDED
Binary file (95.9 kB). View file
 
vggsfm_code/examples/videos/cake_video.mp4 ADDED
Binary file (295 kB). View file