Spaces:

facebook
/

vggsfm

Running on Zero

App Files Files Community

JianyuanWang commited on Jun 26

Commit

5d6ede9

•

1 Parent(s): d814d5a

update

Browse files

Files changed (23) hide show

app.py +98 -35
images_to_videos.py +6 -4
vggsfm_code/examples/apple/images/frame000007.jpg +0 -0
vggsfm_code/examples/apple/images/frame000012.jpg +0 -0
vggsfm_code/examples/apple/images/frame000017.jpg +0 -0
vggsfm_code/examples/apple/images/frame000019.jpg +0 -0
vggsfm_code/examples/apple/images/frame000024.jpg +0 -0
vggsfm_code/examples/apple/images/frame000025.jpg +0 -0
vggsfm_code/examples/apple/images/frame000043.jpg +0 -0
vggsfm_code/examples/apple/images/frame000052.jpg +0 -0
vggsfm_code/examples/apple/images/frame000070.jpg +0 -0
vggsfm_code/examples/apple/images/frame000077.jpg +0 -0
vggsfm_code/examples/apple/images/frame000085.jpg +0 -0
vggsfm_code/examples/apple/images/frame000096.jpg +0 -0
vggsfm_code/examples/apple/images/frame000128.jpg +0 -0
vggsfm_code/examples/apple/images/frame000145.jpg +0 -0
vggsfm_code/examples/apple/images/frame000160.jpg +0 -0
vggsfm_code/examples/apple/images/frame000162.jpg +0 -0
vggsfm_code/examples/apple/images/frame000168.jpg +0 -0
vggsfm_code/examples/apple/images/frame000172.jpg +0 -0
vggsfm_code/examples/apple/images/frame000191.jpg +0 -0
vggsfm_code/examples/apple/images/frame000200.jpg +0 -0
vggsfm_code/examples/videos/british_museum_video.mp4 +0 -0

app.py CHANGED Viewed

@@ -154,6 +154,13 @@ def vggsfm_predictions_to_glb(predictions):
     rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
     glbscene.apply_transform(np.linalg.inv(np.linalg.inv(extrinsics_opencv_4x4[0]) @ opengl_mat @ rot))
     # glbfile = "glbscene.glb"
     # glbscene.export(file_obj=glbfile)
     return glbscene
@@ -173,44 +180,100 @@ cake_video = "vggsfm_code/examples/videos/cake_video.mp4"
 cake_images = glob.glob(f'vggsfm_code/examples/cake/images/*')
 british_museum_images = glob.glob(f'vggsfm_code/examples/british_museum/images/*')
-if True:
-    demo = gr.Interface(
-        title="🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion",
-        fn=vggsfm_demo,
-        inputs=[
-            gr.Video(label="Input video", interactive=True),
-            gr.File(file_count="multiple", label="Input Images", interactive=True),
-            gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images"),
-            gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points"),
-        ],
-        outputs=[gr.Model3D(label="Reconstruction"), gr.Textbox(label="Log")],
-        examples=[
-            # [apple_video, apple_images, 5, 2048],
-            [cake_video, cake_images, 3, 4096],
-            [british_museum_video, british_museum_images, 2, 4096],],
-        cache_examples=True,
-        # allow_flagging=False,
-        allow_flagging='never',  # Updated from False to 'never'
-        concurrency_limit=1,  # Added concurrency_limit to Interface
-        description = """<div style="text-align: left;">
-        <p>Welcome to <a href="https://github.com/facebookresearch/vggsfm" target="_blank">VGGSfM</a> demo!
-        This space demonstrates 3D reconstruction from input image frames. </p>
-        <p>To get started quickly, you can click on our examples. If you want to reconstruct your own data, simply: </p>
-        <ul style="display: inline-block; text-align: left;">
-            <li>upload the images (.jpg, .png, etc.), or </li>
-            <li>upload a video (.mp4, .mov, etc.) </li>
-        </ul>
-        <p>If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract one image frame per second from the input video. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
-        <p>For more details, check our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
-        <p>(Please note that running reconstruction on Hugging Face space is slower than on a local machine.) </p>
-        </div>""",
     )
     demo.launch(debug=True, share=True)
-else:
-    import glob
-    files = glob.glob(f'vggsfm_code/examples/cake/images/*', recursive=True)
-    vggsfm_demo(files, None, None)
 # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)

     rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
     glbscene.apply_transform(np.linalg.inv(np.linalg.inv(extrinsics_opencv_4x4[0]) @ opengl_mat @ rot))
+    # Calculate the bounding box center and apply the translation
+    bounding_box = glbscene.bounds
+    center = (bounding_box[0] + bounding_box[1]) / 2
+    translation = np.eye(4)
+    translation[:3, 3] = -center
+    glbscene.apply_transform(translation)
     # glbfile = "glbscene.glb"
     # glbscene.export(file_obj=glbfile)
     return glbscene
 cake_images = glob.glob(f'vggsfm_code/examples/cake/images/*')
 british_museum_images = glob.glob(f'vggsfm_code/examples/british_museum/images/*')
+########################################################################################################################
+# if True:
+# demo = gr.Interface(
+#     title="🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion",
+#     fn=vggsfm_demo,
+#     inputs=[
+#         gr.Video(label="Input video", interactive=True, scale=1),
+#         gr.File(file_count="multiple", label="Input Images", interactive=True, scale=1),
+#         gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images", scale=1),
+#         gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points", scale=1),
+#     ],
+#     outputs=[
+#         gr.Model3D(label="Reconstruction", scale=10),
+#         gr.Textbox(label="Log", scale=10)
+#     ],
+#     # outputs=[gr.Model3D(label="Reconstruction", scale=3), gr.Textbox(label="Log", )],
+#     examples=[
+#         # [apple_video, apple_images, 5, 2048],
+#         [cake_video, cake_images, 3, 4096],
+#         [british_museum_video, british_museum_images, 2, 4096],],
+#     cache_examples=False,
+#     # allow_flagging=False,
+#     allow_flagging='never',  # Updated from False to 'never'
+#     concurrency_limit=1,  # Added concurrency_limit to Interface
+#     description = """<div style="text-align: left;">
+#     <p>Welcome to <a href="https://github.com/facebookresearch/vggsfm" target="_blank">VGGSfM</a> demo!
+#     This space demonstrates 3D reconstruction from input image frames. </p>
+#     <p>To get started quickly, you can click on our examples. If you want to reconstruct your own data, simply: </p>
+#     <ul style="display: inline-block; text-align: left;">
+#         <li>upload the images (.jpg, .png, etc.), or </li>
+#         <li>upload a video (.mp4, .mov, etc.) </li>
+#     </ul>
+#     <p>If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract one image frame per second from the input video. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
+#     <p>For more details, check our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
+#     <p>(Please note that running reconstruction on Hugging Face space is slower than on a local machine.) </p>
+#     </div>""",
+# )
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎨 VGGSfM: Visual Geometry Grounded Deep Structure From Motion")
+    gr.Markdown("""
+    <div style="text-align: left;">
+    <p>Welcome to <a href="https://github.com/facebookresearch/vggsfm" target="_blank">VGGSfM</a> demo!
+    This space demonstrates 3D reconstruction from input image frames. </p>
+    <p>To get started quickly, you can click on our examples (page bottom). If you want to reconstruct your own data, simply: </p>
+    <ul style="display: inline-block; text-align: left;">
+        <li>upload the images (.jpg, .png, etc.), or </li>
+        <li>upload a video (.mp4, .mov, etc.) </li>
+    </ul>
+    <p>If both images and videos are uploaded, the demo will only reconstruct the uploaded images. By default, we extract one image frame per second from the input video. To prevent crashes on the Hugging Face space, we currently limit reconstruction to the first 20 image frames. </p>
+    <p>For more details, check our <a href="https://github.com/facebookresearch/vggsfm" target="_blank">GitHub Repo</a> ⭐</p>
+    <p>(Please note that running reconstruction on Hugging Face space is slower than on a local machine.) </p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_video = gr.Video(label="Input video", interactive=True)
+            input_images = gr.File(file_count="multiple", label="Input Images", interactive=True)
+            num_query_images = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="Number of query images")
+            num_query_points = gr.Slider(minimum=512, maximum=4096, step=1, value=1024, label="Number of query points")
+        with gr.Column(scale=3):
+            reconstruction_output = gr.Model3D(label="Reconstruction", height=520)
+            log_output = gr.Textbox(label="Log")
+    submit_btn = gr.Button("Submit")
+    examples = [
+        [cake_video, cake_images, 3, 4096],
+        [british_museum_video, british_museum_images, 2, 4096],
+    ]
+    gr.Examples(examples=examples,
+                inputs=[input_video, input_images, num_query_images, num_query_points],
+                outputs=[reconstruction_output, log_output],  # Provide outputs
+                fn=vggsfm_demo,  # Provide the function
+                cache_examples=True
+                )
+    submit_btn.click(
+        vggsfm_demo,
+        [input_video, input_images, num_query_images, num_query_points],
+        [reconstruction_output, log_output]
     )
     demo.launch(debug=True, share=True)
+########################################################################################################################
+# else:
+#     import glob
+#     files = glob.glob(f'vggsfm_code/examples/cake/images/*', recursive=True)
+#     vggsfm_demo(files, None, None)
 # demo.queue(max_size=20, concurrency_count=1).launch(debug=True, share=True)

images_to_videos.py CHANGED Viewed

@@ -2,8 +2,7 @@ import cv2
 import os
 # Parameters
-name = "cake"
 folder_path = f'vggsfm_code/examples/{name}/images'  # Update with the path to your images
 video_path = f'{name}_video.mp4'
 fps = 1  # frames per second
@@ -22,7 +21,10 @@ video = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
 # Add images to video
 for image in images:
-    video.write(cv2.imread(os.path.join(folder_path, image)))
 # Release the video writer
-video.release()

 import os
 # Parameters
+name = "british_museum"
 folder_path = f'vggsfm_code/examples/{name}/images'  # Update with the path to your images
 video_path = f'{name}_video.mp4'
 fps = 1  # frames per second
 # Add images to video
 for image in images:
+    img = cv2.imread(os.path.join(folder_path, image))
+    if img.shape[:2] != (height, width):
+        img = cv2.resize(img, (width, height))  # Resize image to match the first image's size
+    video.write(img)
 # Release the video writer
+video.release()

vggsfm_code/examples/apple/images/frame000007.jpg DELETED Viewed

Binary file (141 kB)

vggsfm_code/examples/apple/images/frame000012.jpg DELETED Viewed

Binary file (140 kB)

vggsfm_code/examples/apple/images/frame000017.jpg DELETED Viewed

Binary file (148 kB)

vggsfm_code/examples/apple/images/frame000019.jpg DELETED Viewed

Binary file (138 kB)

vggsfm_code/examples/apple/images/frame000024.jpg DELETED Viewed

Binary file (134 kB)

vggsfm_code/examples/apple/images/frame000025.jpg DELETED Viewed

Binary file (129 kB)

vggsfm_code/examples/apple/images/frame000043.jpg DELETED Viewed

Binary file (124 kB)

vggsfm_code/examples/apple/images/frame000052.jpg DELETED Viewed

Binary file (136 kB)

vggsfm_code/examples/apple/images/frame000070.jpg DELETED Viewed

Binary file (146 kB)

vggsfm_code/examples/apple/images/frame000077.jpg DELETED Viewed

Binary file (154 kB)

vggsfm_code/examples/apple/images/frame000085.jpg DELETED Viewed

Binary file (138 kB)

vggsfm_code/examples/apple/images/frame000096.jpg DELETED Viewed

Binary file (136 kB)

vggsfm_code/examples/apple/images/frame000128.jpg DELETED Viewed

Binary file (130 kB)

vggsfm_code/examples/apple/images/frame000145.jpg DELETED Viewed

Binary file (146 kB)

vggsfm_code/examples/apple/images/frame000160.jpg DELETED Viewed

Binary file (141 kB)

vggsfm_code/examples/apple/images/frame000162.jpg DELETED Viewed

Binary file (161 kB)

vggsfm_code/examples/apple/images/frame000168.jpg DELETED Viewed

Binary file (183 kB)

vggsfm_code/examples/apple/images/frame000172.jpg DELETED Viewed

Binary file (175 kB)

vggsfm_code/examples/apple/images/frame000191.jpg DELETED Viewed

Binary file (138 kB)

vggsfm_code/examples/apple/images/frame000200.jpg DELETED Viewed

Binary file (184 kB)

vggsfm_code/examples/videos/british_museum_video.mp4 CHANGED Viewed

Binary files a/vggsfm_code/examples/videos/british_museum_video.mp4 and b/vggsfm_code/examples/videos/british_museum_video.mp4 differ