JianyuanWang commited on
Commit
5d6ede9
1 Parent(s): d814d5a
app.py CHANGED
@@ -154,6 +154,13 @@ def vggsfm_predictions_to_glb(predictions):
154
  rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
155
  glbscene.apply_transform(np.linalg.inv(np.linalg.inv(extrinsics_opencv_4x4[0]) @ opengl_mat @ rot))
156
 
 
 
 
 
 
 
 
157
  # glbfile = "glbscene.glb"
158
  # glbscene.export(file_obj=glbfile)
159
  return glbscene
@@ -173,44 +180,100 @@ cake_video = "vggsfm_code/examples/videos/cake_video.mp4"
173
  cake_images = glob.glob(f'vggsfm_code/examples/cake/images/*')
174
  british_museum_images = glob.glob(f'vggsfm_code/examples/british_museum/images/*')
175
 
176
- if True:
177
- demo = gr.Interface(
178
- title="🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion",
179
- fn=vggsfm_demo,
180
- inputs=[
181
- gr.Video(label="Input video", interactive=True),
182
- gr.File(file_count="multiple", label="Input Images", interactive=True),
183
- gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images"),
184
- gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points"),
185
- ],
186
- outputs=[gr.Model3D(label="Reconstruction"), gr.Textbox(label="Log")],
187
- examples=[
188
- # [apple_video, apple_images, 5, 2048],
189
- [cake_video, cake_images, 3, 4096],
190
- [british_museum_video, british_museum_images, 2, 4096],],
191
- cache_examples=True,
192
- # allow_flagging=False,
193
- allow_flagging='never', # Updated from False to 'never'
194
- concurrency_limit=1, # Added concurrency_limit to Interface
195
- description = """<div style="text-align: left;">
196
- <p>Welcome to <a href="https://github.com/facebookresearch/vggsfm" target="_blank">VGGSfM</a> demo!
197
- This space demonstrates 3D reconstruction from input image frames. </p>
198
- <p>To get started quickly, you can click on our examples. If you want to reconstruct your own data, simply: </p>
199
- <ul style="display: inline-block; text-align: left;">
200
- <li>upload the images (.jpg, .png, etc.), or </li>
201
- <li>upload a video (.mp4, .mov, etc.) </li>
202
- </ul>
203
- <p>If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract one image frame per second from the input video. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
204
- <p>For more details, check our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
205
- <p>(Please note that running reconstruction on Hugging Face space is slower than on a local machine.) </p>
206
- </div>""",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  )
208
 
209
  demo.launch(debug=True, share=True)
210
- else:
211
- import glob
212
- files = glob.glob(f'vggsfm_code/examples/cake/images/*', recursive=True)
213
- vggsfm_demo(files, None, None)
 
 
214
 
215
 
216
  # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
 
154
  rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
155
  glbscene.apply_transform(np.linalg.inv(np.linalg.inv(extrinsics_opencv_4x4[0]) @ opengl_mat @ rot))
156
 
157
+ # Calculate the bounding box center and apply the translation
158
+ bounding_box = glbscene.bounds
159
+ center = (bounding_box[0] + bounding_box[1]) / 2
160
+ translation = np.eye(4)
161
+ translation[:3, 3] = -center
162
+
163
+ glbscene.apply_transform(translation)
164
  # glbfile = "glbscene.glb"
165
  # glbscene.export(file_obj=glbfile)
166
  return glbscene
 
180
  cake_images = glob.glob(f'vggsfm_code/examples/cake/images/*')
181
  british_museum_images = glob.glob(f'vggsfm_code/examples/british_museum/images/*')
182
 
183
+ ########################################################################################################################
184
+ # if True:
185
+ # demo = gr.Interface(
186
+ # title="🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion",
187
+ # fn=vggsfm_demo,
188
+ # inputs=[
189
+ # gr.Video(label="Input video", interactive=True, scale=1),
190
+ # gr.File(file_count="multiple", label="Input Images", interactive=True, scale=1),
191
+ # gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images", scale=1),
192
+ # gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points", scale=1),
193
+ # ],
194
+ # outputs=[
195
+ # gr.Model3D(label="Reconstruction", scale=10),
196
+ # gr.Textbox(label="Log", scale=10)
197
+ # ],
198
+ # # outputs=[gr.Model3D(label="Reconstruction", scale=3), gr.Textbox(label="Log", )],
199
+ # examples=[
200
+ # # [apple_video, apple_images, 5, 2048],
201
+ # [cake_video, cake_images, 3, 4096],
202
+ # [british_museum_video, british_museum_images, 2, 4096],],
203
+ # cache_examples=False,
204
+ # # allow_flagging=False,
205
+ # allow_flagging='never', # Updated from False to 'never'
206
+ # concurrency_limit=1, # Added concurrency_limit to Interface
207
+ # description = """<div style="text-align: left;">
208
+ # <p>Welcome to <a href="https://github.com/facebookresearch/vggsfm" target="_blank">VGGSfM</a> demo!
209
+ # This space demonstrates 3D reconstruction from input image frames. </p>
210
+ # <p>To get started quickly, you can click on our examples. If you want to reconstruct your own data, simply: </p>
211
+ # <ul style="display: inline-block; text-align: left;">
212
+ # <li>upload the images (.jpg, .png, etc.), or </li>
213
+ # <li>upload a video (.mp4, .mov, etc.) </li>
214
+ # </ul>
215
+ # <p>If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract one image frame per second from the input video. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
216
+ # <p>For more details, check our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
217
+ # <p>(Please note that running reconstruction on Hugging Face space is slower than on a local machine.) </p>
218
+ # </div>""",
219
+ # )
220
+
221
+ with gr.Blocks() as demo:
222
+ gr.Markdown("# 🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion")
223
+
224
+ gr.Markdown("""
225
+ <div style="text-align: left;">
226
+ <p>Welcome to <a href="https://github.com/facebookresearch/vggsfm" target="_blank">VGGSfM</a> demo!
227
+ This space demonstrates 3D reconstruction from input image frames. </p>
228
+ <p>To get started quickly, you can click on our examples (page bottom). If you want to reconstruct your own data, simply: </p>
229
+ <ul style="display: inline-block; text-align: left;">
230
+ <li>upload the images (.jpg, .png, etc.), or </li>
231
+ <li>upload a video (.mp4, .mov, etc.) </li>
232
+ </ul>
233
+ <p>If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract one image frame per second from the input video. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
234
+ <p>For more details, check our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
235
+ <p>(Please note that running reconstruction on Hugging Face space is slower than on a local machine.) </p>
236
+ </div>
237
+ """)
238
+
239
+ with gr.Row():
240
+ with gr.Column(scale=1):
241
+ input_video = gr.Video(label="Input video", interactive=True)
242
+ input_images = gr.File(file_count="multiple", label="Input Images", interactive=True)
243
+ num_query_images = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images")
244
+ num_query_points = gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points")
245
+
246
+ with gr.Column(scale=3):
247
+ reconstruction_output = gr.Model3D(label="Reconstruction", height=520)
248
+ log_output = gr.Textbox(label="Log")
249
+
250
+ submit_btn = gr.Button("Submit")
251
+
252
+ examples = [
253
+ [cake_video, cake_images, 3, 4096],
254
+ [british_museum_video, british_museum_images, 2, 4096],
255
+ ]
256
+
257
+ gr.Examples(examples=examples,
258
+ inputs=[input_video, input_images, num_query_images, num_query_points],
259
+ outputs=[reconstruction_output, log_output], # Provide outputs
260
+ fn=vggsfm_demo, # Provide the function
261
+ cache_examples=True
262
+ )
263
+
264
+ submit_btn.click(
265
+ vggsfm_demo,
266
+ [input_video, input_images, num_query_images, num_query_points],
267
+ [reconstruction_output, log_output]
268
  )
269
 
270
  demo.launch(debug=True, share=True)
271
+ ########################################################################################################################
272
+
273
+ # else:
274
+ # import glob
275
+ # files = glob.glob(f'vggsfm_code/examples/cake/images/*', recursive=True)
276
+ # vggsfm_demo(files, None, None)
277
 
278
 
279
  # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)
images_to_videos.py CHANGED
@@ -2,8 +2,7 @@ import cv2
2
  import os
3
 
4
  # Parameters
5
-
6
- name = "cake"
7
  folder_path = f'vggsfm_code/examples/{name}/images' # Update with the path to your images
8
  video_path = f'{name}_video.mp4'
9
  fps = 1 # frames per second
@@ -22,7 +21,10 @@ video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
22
 
23
  # Add images to video
24
  for image in images:
25
- video.write(cv2.imread(os.path.join(folder_path, image)))
 
 
 
26
 
27
  # Release the video writer
28
- video.release()
 
2
  import os
3
 
4
  # Parameters
5
+ name = "british_museum"
 
6
  folder_path = f'vggsfm_code/examples/{name}/images' # Update with the path to your images
7
  video_path = f'{name}_video.mp4'
8
  fps = 1 # frames per second
 
21
 
22
  # Add images to video
23
  for image in images:
24
+ img = cv2.imread(os.path.join(folder_path, image))
25
+ if img.shape[:2] != (height, width):
26
+ img = cv2.resize(img, (width, height)) # Resize image to match the first image's size
27
+ video.write(img)
28
 
29
  # Release the video writer
30
+ video.release()
vggsfm_code/examples/apple/images/frame000007.jpg DELETED
Binary file (141 kB)
 
vggsfm_code/examples/apple/images/frame000012.jpg DELETED
Binary file (140 kB)
 
vggsfm_code/examples/apple/images/frame000017.jpg DELETED
Binary file (148 kB)
 
vggsfm_code/examples/apple/images/frame000019.jpg DELETED
Binary file (138 kB)
 
vggsfm_code/examples/apple/images/frame000024.jpg DELETED
Binary file (134 kB)
 
vggsfm_code/examples/apple/images/frame000025.jpg DELETED
Binary file (129 kB)
 
vggsfm_code/examples/apple/images/frame000043.jpg DELETED
Binary file (124 kB)
 
vggsfm_code/examples/apple/images/frame000052.jpg DELETED
Binary file (136 kB)
 
vggsfm_code/examples/apple/images/frame000070.jpg DELETED
Binary file (146 kB)
 
vggsfm_code/examples/apple/images/frame000077.jpg DELETED
Binary file (154 kB)
 
vggsfm_code/examples/apple/images/frame000085.jpg DELETED
Binary file (138 kB)
 
vggsfm_code/examples/apple/images/frame000096.jpg DELETED
Binary file (136 kB)
 
vggsfm_code/examples/apple/images/frame000128.jpg DELETED
Binary file (130 kB)
 
vggsfm_code/examples/apple/images/frame000145.jpg DELETED
Binary file (146 kB)
 
vggsfm_code/examples/apple/images/frame000160.jpg DELETED
Binary file (141 kB)
 
vggsfm_code/examples/apple/images/frame000162.jpg DELETED
Binary file (161 kB)
 
vggsfm_code/examples/apple/images/frame000168.jpg DELETED
Binary file (183 kB)
 
vggsfm_code/examples/apple/images/frame000172.jpg DELETED
Binary file (175 kB)
 
vggsfm_code/examples/apple/images/frame000191.jpg DELETED
Binary file (138 kB)
 
vggsfm_code/examples/apple/images/frame000200.jpg DELETED
Binary file (184 kB)
 
vggsfm_code/examples/videos/british_museum_video.mp4 CHANGED
Binary files a/vggsfm_code/examples/videos/british_museum_video.mp4 and b/vggsfm_code/examples/videos/british_museum_video.mp4 differ