toshas commited on
Commit
4e4c4c8
1 Parent(s): 7d6afa0

fix ui not updating outputs when changing parameters

Browse files

increase video processing pipeline duration
fix markdown center alignment
add reference to the original marigold demo
simplify the bas-relief updates via ux
point to the prs-eth org model checkpoints
fix reproducibility with seeding
add license headers

Files changed (5) hide show
  1. README.md +7 -10
  2. app.py +47 -44
  3. extrude.py +20 -0
  4. marigold_depth_estimation_lcm.py +10 -4
  5. requirements.txt +5 -4
README.md CHANGED
@@ -4,26 +4,23 @@ emoji: 🏵️
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 4.23.0
8
  app_file: app.py
9
  pinned: true
10
  license: cc-by-sa-4.0
11
  models:
12
- - prs-eth/marigold-v1-0
13
  - prs-eth/marigold-lcm-v1-0
14
  ---
15
 
16
  This is a demo of Marigold-LCM, the state-of-the-art depth estimator for images in the wild.
17
  It combines the power of the original Marigold 10-step estimator and the Latent Consistency Models, delivering high-quality results in as little as one step.
18
- Find out more in our paper titled ["Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation"](https://arxiv.org/abs/2312.02145)
19
 
20
  ```
21
- @misc{ke2023repurposing,
22
- title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
23
- author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
24
- year={2023},
25
- eprint={2312.02145},
26
- archivePrefix={arXiv},
27
- primaryClass={cs.CV}
28
  }
29
  ```
 
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 4.21.0
8
  app_file: app.py
9
  pinned: true
10
  license: cc-by-sa-4.0
11
  models:
 
12
  - prs-eth/marigold-lcm-v1-0
13
  ---
14
 
15
  This is a demo of Marigold-LCM, the state-of-the-art depth estimator for images in the wild.
16
  It combines the power of the original Marigold 10-step estimator and the Latent Consistency Models, delivering high-quality results in as little as one step.
17
+ Find out more in our CVPR 2024 paper titled ["Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation"](https://arxiv.org/abs/2312.02145)
18
 
19
  ```
20
+ @InProceedings{ke2023repurposing,
21
+ title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
22
+ author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
23
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
24
+ year={2024}
 
 
25
  }
26
  ```
app.py CHANGED
@@ -1,6 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import functools
2
  import os
3
- import shutil
4
  import zipfile
5
  from io import BytesIO
6
 
@@ -10,7 +30,6 @@ import imageio as imageio
10
  import numpy as np
11
  import torch as torch
12
  from PIL import Image
13
- from diffusers import UNet2DConditionModel, LCMScheduler
14
  from gradio_imageslider import ImageSlider
15
  from huggingface_hub import login
16
  from tqdm import tqdm
@@ -52,7 +71,6 @@ def process_image(
52
  denoise_steps=default_image_denoise_steps,
53
  ensemble_size=default_image_ensemble_size,
54
  processing_res=default_image_processing_res,
55
- reproducible=default_image_reproducuble,
56
  ):
57
  input_image = Image.open(path_input)
58
 
@@ -62,7 +80,7 @@ def process_image(
62
  ensemble_size=ensemble_size,
63
  processing_res=processing_res,
64
  batch_size=1 if processing_res == 0 else 0,
65
- seed=default_seed if reproducible else None,
66
  show_progress_bar=False,
67
  )
68
 
@@ -70,8 +88,7 @@ def process_image(
70
  depth_colored = pipe_out.depth_colored
71
  depth_16bit = (depth_pred * 65535.0).astype(np.uint16)
72
 
73
- path_output_dir = os.path.splitext(path_input)[0] + "_output"
74
- os.makedirs(path_output_dir, exist_ok=True)
75
 
76
  name_base = os.path.splitext(os.path.basename(path_input))[0]
77
  path_out_fp32 = os.path.join(path_output_dir, f"{name_base}_depth_fp32.npy")
@@ -99,8 +116,7 @@ def process_video(
99
  out_max_frames=default_video_out_max_frames,
100
  progress=gr.Progress(),
101
  ):
102
- path_output_dir = os.path.splitext(path_input)[0] + "_output"
103
- os.makedirs(path_output_dir, exist_ok=True)
104
 
105
  name_base = os.path.splitext(os.path.basename(path_input))[0]
106
  path_out_vis = os.path.join(path_output_dir, f"{name_base}_depth_colored.mp4")
@@ -152,6 +168,7 @@ def process_video(
152
  batch_size=0,
153
  depth_latent_init=prev_depth_latent,
154
  depth_latent_init_strength=depth_latent_init_strength,
 
155
  seed=default_seed,
156
  show_progress_bar=False,
157
  )
@@ -204,8 +221,7 @@ def process_bas(
204
  if plane_near >= plane_far:
205
  raise gr.Error("NEAR plane must have a value smaller than the FAR plane")
206
 
207
- path_output_dir = os.path.splitext(path_input)[0] + "_output"
208
- os.makedirs(path_output_dir, exist_ok=True)
209
 
210
  name_base, name_ext = os.path.splitext(os.path.basename(path_input))
211
 
@@ -280,7 +296,7 @@ def process_bas(
280
 
281
  def run_demo_server(pipe):
282
  process_pipe_image = spaces.GPU(functools.partial(process_image, pipe))
283
- process_pipe_video = spaces.GPU(functools.partial(process_video, pipe))
284
  process_pipe_bas = spaces.GPU(functools.partial(process_bas, pipe))
285
  os.environ["GRADIO_ALLOW_FLAGGING"] = "never"
286
 
@@ -304,6 +320,18 @@ def run_demo_server(pipe):
304
  font-size: 20px !important;
305
  color: crimson !important;
306
  }
 
 
 
 
 
 
 
 
 
 
 
 
307
  """,
308
  head="""
309
  <script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
@@ -317,7 +345,7 @@ def run_demo_server(pipe):
317
  ) as demo:
318
  gr.Markdown(
319
  """
320
- <h1 align="center">Marigold-LCM Depth Estimation</h1>
321
  <p align="center">
322
  <a title="Website" href="https://marigoldmonodepth.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
323
  <img src="https://www.obukhov.ai/img/badges/badge-website.svg">
@@ -336,9 +364,10 @@ def run_demo_server(pipe):
336
  Marigold-LCM is the fast version of Marigold, the state-of-the-art depth estimator for images in the wild.
337
  It combines the power of the original Marigold 10-step estimator and the Latent Consistency Models, delivering high-quality results in as little as <b>one step</b>.
338
  We provide three functions in this demo: Image, Video, and Bas-relief 3D processing — <b>see the tabs below</b>.
339
- Upload your content into the <b>left</b> side, or click any of the <b>examples</b> below.
340
- Wait a second (for images and 3D) or a minute (for videos), and interact with the result in the <b>right</b> side.
341
  To avoid queuing, fork the demo into your profile.
 
342
  </p>
343
  """
344
  )
@@ -474,8 +503,6 @@ def run_demo_server(pipe):
474
  <p align="justify">
475
  This part of the demo uses Marigold-LCM to create a bas-relief model.
476
  The models are watertight, with correct normals, and exported in the STL format, which makes them <b>3D-printable</b>.
477
- Start by uploading the image and click "Create" with the default parameters.
478
- To improve the result, click "Clear", adjust the geometry sliders below, and click "Create" again.
479
  </p>
480
  """,
481
  )
@@ -487,7 +514,6 @@ def run_demo_server(pipe):
487
  )
488
  with gr.Row():
489
  bas_submit_btn = gr.Button(value="Create 3D", variant="primary")
490
- bas_clear_btn = gr.Button(value="Clear")
491
  bas_reset_btn = gr.Button(value="Reset")
492
  with gr.Accordion("3D printing demo: Main options", open=True):
493
  bas_plane_near = gr.Slider(
@@ -703,13 +729,8 @@ def run_demo_server(pipe):
703
  concurrency_limit=1,
704
  )
705
 
706
- def wrapper_process_pipe_bas(*args, **kwargs):
707
- out = list(process_pipe_bas(*args, **kwargs))
708
- out = [gr.Button(interactive=False), gr.Image(interactive=False)] + out
709
- return out
710
-
711
  bas_submit_btn.click(
712
- fn=wrapper_process_pipe_bas,
713
  inputs=[
714
  bas_input,
715
  bas_plane_near,
@@ -725,18 +746,7 @@ def run_demo_server(pipe):
725
  bas_frame_near,
726
  bas_frame_far,
727
  ],
728
- outputs=[bas_submit_btn, bas_input, bas_output_viewer, bas_output_files],
729
- concurrency_limit=1,
730
- )
731
-
732
- bas_clear_btn.click(
733
- fn=lambda: (gr.Button(interactive=True), None, None),
734
- inputs=[],
735
- outputs=[
736
- bas_submit_btn,
737
- bas_output_viewer,
738
- bas_output_files,
739
- ],
740
  concurrency_limit=1,
741
  )
742
 
@@ -790,21 +800,14 @@ def run_demo_server(pipe):
790
 
791
 
792
  def main():
793
- CHECKPOINT = "prs-eth/marigold-v1-0"
794
- CHECKPOINT_UNET_LCM = "prs-eth/marigold-lcm-v1-0"
795
 
796
  if "HF_TOKEN_LOGIN" in os.environ:
797
  login(token=os.environ["HF_TOKEN_LOGIN"])
798
 
799
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
800
 
801
- pipe = MarigoldDepthConsistencyPipeline.from_pretrained(
802
- CHECKPOINT,
803
- unet=UNet2DConditionModel.from_pretrained(
804
- CHECKPOINT_UNET_LCM, subfolder="unet", use_auth_token=True
805
- ),
806
- )
807
- pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
808
  try:
809
  import xformers
810
 
 
1
+ # Copyright 2024 Anton Obukhov, ETH Zurich. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # --------------------------------------------------------------------------
15
+ # If you find this code useful, we kindly ask you to cite our paper in your work.
16
+ # Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
17
+ # More information about the method can be found at https://marigoldmonodepth.github.io
18
+ # --------------------------------------------------------------------------
19
+
20
+
21
  import functools
22
  import os
23
+ import tempfile
24
  import zipfile
25
  from io import BytesIO
26
 
 
30
  import numpy as np
31
  import torch as torch
32
  from PIL import Image
 
33
  from gradio_imageslider import ImageSlider
34
  from huggingface_hub import login
35
  from tqdm import tqdm
 
71
  denoise_steps=default_image_denoise_steps,
72
  ensemble_size=default_image_ensemble_size,
73
  processing_res=default_image_processing_res,
 
74
  ):
75
  input_image = Image.open(path_input)
76
 
 
80
  ensemble_size=ensemble_size,
81
  processing_res=processing_res,
82
  batch_size=1 if processing_res == 0 else 0,
83
+ seed=default_seed,
84
  show_progress_bar=False,
85
  )
86
 
 
88
  depth_colored = pipe_out.depth_colored
89
  depth_16bit = (depth_pred * 65535.0).astype(np.uint16)
90
 
91
+ path_output_dir = tempfile.mkdtemp()
 
92
 
93
  name_base = os.path.splitext(os.path.basename(path_input))[0]
94
  path_out_fp32 = os.path.join(path_output_dir, f"{name_base}_depth_fp32.npy")
 
116
  out_max_frames=default_video_out_max_frames,
117
  progress=gr.Progress(),
118
  ):
119
+ path_output_dir = tempfile.mkdtemp()
 
120
 
121
  name_base = os.path.splitext(os.path.basename(path_input))[0]
122
  path_out_vis = os.path.join(path_output_dir, f"{name_base}_depth_colored.mp4")
 
168
  batch_size=0,
169
  depth_latent_init=prev_depth_latent,
170
  depth_latent_init_strength=depth_latent_init_strength,
171
+ return_depth_latent=True,
172
  seed=default_seed,
173
  show_progress_bar=False,
174
  )
 
221
  if plane_near >= plane_far:
222
  raise gr.Error("NEAR plane must have a value smaller than the FAR plane")
223
 
224
+ path_output_dir = tempfile.mkdtemp()
 
225
 
226
  name_base, name_ext = os.path.splitext(os.path.basename(path_input))
227
 
 
296
 
297
  def run_demo_server(pipe):
298
  process_pipe_image = spaces.GPU(functools.partial(process_image, pipe))
299
+ process_pipe_video = spaces.GPU(functools.partial(process_video, pipe), duration=120)
300
  process_pipe_bas = spaces.GPU(functools.partial(process_bas, pipe))
301
  os.environ["GRADIO_ALLOW_FLAGGING"] = "never"
302
 
 
320
  font-size: 20px !important;
321
  color: crimson !important;
322
  }
323
+ h1 {
324
+ text-align: center;
325
+ display: block;
326
+ }
327
+ h2 {
328
+ text-align: center;
329
+ display: block;
330
+ }
331
+ h3 {
332
+ text-align: center;
333
+ display: block;
334
+ }
335
  """,
336
  head="""
337
  <script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
 
345
  ) as demo:
346
  gr.Markdown(
347
  """
348
+ # Marigold-LCM Depth Estimation
349
  <p align="center">
350
  <a title="Website" href="https://marigoldmonodepth.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
351
  <img src="https://www.obukhov.ai/img/badges/badge-website.svg">
 
364
  Marigold-LCM is the fast version of Marigold, the state-of-the-art depth estimator for images in the wild.
365
  It combines the power of the original Marigold 10-step estimator and the Latent Consistency Models, delivering high-quality results in as little as <b>one step</b>.
366
  We provide three functions in this demo: Image, Video, and Bas-relief 3D processing — <b>see the tabs below</b>.
367
+ Upload your content into the <b>first</b> pane, or click any of the <b>examples</b> below.
368
+ Wait a second (for images and 3D) or a minute (for videos), and interact with the result in the <b>second</b> pane.
369
  To avoid queuing, fork the demo into your profile.
370
+ <a href="https://huggingface.co/spaces/prs-eth/marigold">The original Marigold demo is also available</a>.
371
  </p>
372
  """
373
  )
 
503
  <p align="justify">
504
  This part of the demo uses Marigold-LCM to create a bas-relief model.
505
  The models are watertight, with correct normals, and exported in the STL format, which makes them <b>3D-printable</b>.
 
 
506
  </p>
507
  """,
508
  )
 
514
  )
515
  with gr.Row():
516
  bas_submit_btn = gr.Button(value="Create 3D", variant="primary")
 
517
  bas_reset_btn = gr.Button(value="Reset")
518
  with gr.Accordion("3D printing demo: Main options", open=True):
519
  bas_plane_near = gr.Slider(
 
729
  concurrency_limit=1,
730
  )
731
 
 
 
 
 
 
732
  bas_submit_btn.click(
733
+ fn=process_pipe_bas,
734
  inputs=[
735
  bas_input,
736
  bas_plane_near,
 
746
  bas_frame_near,
747
  bas_frame_far,
748
  ],
749
+ outputs=[bas_output_viewer, bas_output_files],
 
 
 
 
 
 
 
 
 
 
 
750
  concurrency_limit=1,
751
  )
752
 
 
800
 
801
 
802
  def main():
803
+ CHECKPOINT = "prs-eth/marigold-lcm-v1-0"
 
804
 
805
  if "HF_TOKEN_LOGIN" in os.environ:
806
  login(token=os.environ["HF_TOKEN_LOGIN"])
807
 
808
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
809
 
810
+ pipe = MarigoldDepthConsistencyPipeline.from_pretrained(CHECKPOINT)
 
 
 
 
 
 
811
  try:
812
  import xformers
813
 
extrude.py CHANGED
@@ -1,3 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import math
2
  import os
3
 
 
1
+ # Copyright 2024 Anton Obukhov, ETH Zurich. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # --------------------------------------------------------------------------
15
+ # If you find this code useful, we kindly ask you to cite our paper in your work.
16
+ # Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
17
+ # More information about the method can be found at https://marigoldmonodepth.github.io
18
+ # --------------------------------------------------------------------------
19
+
20
+
21
  import math
22
  import os
23
 
marigold_depth_estimation_lcm.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Anton Obukhov, Bingxin Ke, ETH Zurich and The HuggingFace Team. All rights reserved.
2
  #
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
  # you may not use this file except in compliance with the License.
@@ -119,6 +119,7 @@ class MarigoldDepthConsistencyPipeline(DiffusionPipeline):
119
  batch_size: int = 0,
120
  depth_latent_init: torch.Tensor = None,
121
  depth_latent_init_strength: float = 0.1,
 
122
  seed: int = None,
123
  color_map: str = "Spectral",
124
  show_progress_bar: bool = True,
@@ -147,6 +148,8 @@ class MarigoldDepthConsistencyPipeline(DiffusionPipeline):
147
  Initial depth map latent for better temporal consistency.
148
  depth_latent_init_strength (`float`, *optional*, defaults to `0.1`)
149
  Degree of initial depth latent influence, must be between 0 and 1.
 
 
150
  seed (`int`, *optional*, defaults to `None`)
151
  Reproducibility seed.
152
  show_progress_bar (`bool`, *optional*, defaults to `True`):
@@ -247,8 +250,11 @@ class MarigoldDepthConsistencyPipeline(DiffusionPipeline):
247
  min_d = torch.min(depth_pred)
248
  max_d = torch.max(depth_pred)
249
  depth_pred = (depth_pred - min_d) / (max_d - min_d)
250
- if ensemble_size > 1:
251
- depth_latent = self._encode_depth(2 * depth_pred - 1)
 
 
 
252
 
253
  # Convert to numpy
254
  depth_pred = depth_pred.cpu().numpy().astype(np.float32)
@@ -385,7 +391,7 @@ class MarigoldDepthConsistencyPipeline(DiffusionPipeline):
385
  ).sample # [B, 4, h, w]
386
 
387
  # compute the previous noisy sample x_t -> x_t-1
388
- depth_latent = self.scheduler.step(noise_pred, t, depth_latent).prev_sample
389
 
390
  depth = self._decode_depth(depth_latent)
391
 
 
1
+ # Copyright 2024 Bingxin Ke, Anton Obukhov, ETH Zurich and The HuggingFace Team. All rights reserved.
2
  #
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
  # you may not use this file except in compliance with the License.
 
119
  batch_size: int = 0,
120
  depth_latent_init: torch.Tensor = None,
121
  depth_latent_init_strength: float = 0.1,
122
+ return_depth_latent: bool = False,
123
  seed: int = None,
124
  color_map: str = "Spectral",
125
  show_progress_bar: bool = True,
 
148
  Initial depth map latent for better temporal consistency.
149
  depth_latent_init_strength (`float`, *optional*, defaults to `0.1`)
150
  Degree of initial depth latent influence, must be between 0 and 1.
151
+ return_depth_latent (`bool`, defaults to False)
152
+ Whether to return the depth latent.
153
  seed (`int`, *optional*, defaults to `None`)
154
  Reproducibility seed.
155
  show_progress_bar (`bool`, *optional*, defaults to `True`):
 
250
  min_d = torch.min(depth_pred)
251
  max_d = torch.max(depth_pred)
252
  depth_pred = (depth_pred - min_d) / (max_d - min_d)
253
+ if return_depth_latent:
254
+ if ensemble_size > 1:
255
+ depth_latent = self._encode_depth(2 * depth_pred - 1)
256
+ else:
257
+ depth_latent = None
258
 
259
  # Convert to numpy
260
  depth_pred = depth_pred.cpu().numpy().astype(np.float32)
 
391
  ).sample # [B, 4, h, w]
392
 
393
  # compute the previous noisy sample x_t -> x_t-1
394
+ depth_latent = self.scheduler.step(noise_pred, t, depth_latent, generator=rng).prev_sample
395
 
396
  depth = self._decode_depth(depth_latent)
397
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio==4.23.0
2
  gradio-imageslider==0.0.16
3
  pygltflib==1.16.1
4
  trimesh==4.0.5
@@ -6,10 +6,11 @@ imageio
6
  imageio-ffmpeg
7
  Pillow
8
 
9
- accelerate==0.28.0
 
10
  diffusers==0.27.2
11
  matplotlib==3.8.2
12
  scipy==1.11.4
13
  torch==2.0.1
14
- transformers==4.39.1
15
- xformers==0.0.21
 
1
+ gradio==4.21.0
2
  gradio-imageslider==0.0.16
3
  pygltflib==1.16.1
4
  trimesh==4.0.5
 
6
  imageio-ffmpeg
7
  Pillow
8
 
9
+ spaces>=0.25.0
10
+ accelerate>=0.22.0
11
  diffusers==0.27.2
12
  matplotlib==3.8.2
13
  scipy==1.11.4
14
  torch==2.0.1
15
+ transformers>=4.32.1
16
+ xformers>=0.0.21