Spaces:
Running
on
Zero
Running
on
Zero
fix ui not updating outputs when changing parameters
Browse filesincrease video processing pipeline duration
fix markdown center alignment
add reference to the original marigold demo
simplify the bas-relief updates via ux
point to the prs-eth org model checkpoints
fix reproducibility with seeding
add license headers
- README.md +7 -10
- app.py +47 -44
- extrude.py +20 -0
- marigold_depth_estimation_lcm.py +10 -4
- requirements.txt +5 -4
README.md
CHANGED
@@ -4,26 +4,23 @@ emoji: 🏵️
|
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: cc-by-sa-4.0
|
11 |
models:
|
12 |
-
- prs-eth/marigold-v1-0
|
13 |
- prs-eth/marigold-lcm-v1-0
|
14 |
---
|
15 |
|
16 |
This is a demo of Marigold-LCM, the state-of-the-art depth estimator for images in the wild.
|
17 |
It combines the power of the original Marigold 10-step estimator and the Latent Consistency Models, delivering high-quality results in as little as one step.
|
18 |
-
Find out more in our paper titled ["Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation"](https://arxiv.org/abs/2312.02145)
|
19 |
|
20 |
```
|
21 |
-
@
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
archivePrefix={arXiv},
|
27 |
-
primaryClass={cs.CV}
|
28 |
}
|
29 |
```
|
|
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.21.0
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: cc-by-sa-4.0
|
11 |
models:
|
|
|
12 |
- prs-eth/marigold-lcm-v1-0
|
13 |
---
|
14 |
|
15 |
This is a demo of Marigold-LCM, the state-of-the-art depth estimator for images in the wild.
|
16 |
It combines the power of the original Marigold 10-step estimator and the Latent Consistency Models, delivering high-quality results in as little as one step.
|
17 |
+
Find out more in our CVPR 2024 paper titled ["Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation"](https://arxiv.org/abs/2312.02145)
|
18 |
|
19 |
```
|
20 |
+
@InProceedings{ke2023repurposing,
|
21 |
+
title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
|
22 |
+
author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
|
23 |
+
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
24 |
+
year={2024}
|
|
|
|
|
25 |
}
|
26 |
```
|
app.py
CHANGED
@@ -1,6 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import functools
|
2 |
import os
|
3 |
-
import
|
4 |
import zipfile
|
5 |
from io import BytesIO
|
6 |
|
@@ -10,7 +30,6 @@ import imageio as imageio
|
|
10 |
import numpy as np
|
11 |
import torch as torch
|
12 |
from PIL import Image
|
13 |
-
from diffusers import UNet2DConditionModel, LCMScheduler
|
14 |
from gradio_imageslider import ImageSlider
|
15 |
from huggingface_hub import login
|
16 |
from tqdm import tqdm
|
@@ -52,7 +71,6 @@ def process_image(
|
|
52 |
denoise_steps=default_image_denoise_steps,
|
53 |
ensemble_size=default_image_ensemble_size,
|
54 |
processing_res=default_image_processing_res,
|
55 |
-
reproducible=default_image_reproducuble,
|
56 |
):
|
57 |
input_image = Image.open(path_input)
|
58 |
|
@@ -62,7 +80,7 @@ def process_image(
|
|
62 |
ensemble_size=ensemble_size,
|
63 |
processing_res=processing_res,
|
64 |
batch_size=1 if processing_res == 0 else 0,
|
65 |
-
seed=default_seed
|
66 |
show_progress_bar=False,
|
67 |
)
|
68 |
|
@@ -70,8 +88,7 @@ def process_image(
|
|
70 |
depth_colored = pipe_out.depth_colored
|
71 |
depth_16bit = (depth_pred * 65535.0).astype(np.uint16)
|
72 |
|
73 |
-
path_output_dir =
|
74 |
-
os.makedirs(path_output_dir, exist_ok=True)
|
75 |
|
76 |
name_base = os.path.splitext(os.path.basename(path_input))[0]
|
77 |
path_out_fp32 = os.path.join(path_output_dir, f"{name_base}_depth_fp32.npy")
|
@@ -99,8 +116,7 @@ def process_video(
|
|
99 |
out_max_frames=default_video_out_max_frames,
|
100 |
progress=gr.Progress(),
|
101 |
):
|
102 |
-
path_output_dir =
|
103 |
-
os.makedirs(path_output_dir, exist_ok=True)
|
104 |
|
105 |
name_base = os.path.splitext(os.path.basename(path_input))[0]
|
106 |
path_out_vis = os.path.join(path_output_dir, f"{name_base}_depth_colored.mp4")
|
@@ -152,6 +168,7 @@ def process_video(
|
|
152 |
batch_size=0,
|
153 |
depth_latent_init=prev_depth_latent,
|
154 |
depth_latent_init_strength=depth_latent_init_strength,
|
|
|
155 |
seed=default_seed,
|
156 |
show_progress_bar=False,
|
157 |
)
|
@@ -204,8 +221,7 @@ def process_bas(
|
|
204 |
if plane_near >= plane_far:
|
205 |
raise gr.Error("NEAR plane must have a value smaller than the FAR plane")
|
206 |
|
207 |
-
path_output_dir =
|
208 |
-
os.makedirs(path_output_dir, exist_ok=True)
|
209 |
|
210 |
name_base, name_ext = os.path.splitext(os.path.basename(path_input))
|
211 |
|
@@ -280,7 +296,7 @@ def process_bas(
|
|
280 |
|
281 |
def run_demo_server(pipe):
|
282 |
process_pipe_image = spaces.GPU(functools.partial(process_image, pipe))
|
283 |
-
process_pipe_video = spaces.GPU(functools.partial(process_video, pipe))
|
284 |
process_pipe_bas = spaces.GPU(functools.partial(process_bas, pipe))
|
285 |
os.environ["GRADIO_ALLOW_FLAGGING"] = "never"
|
286 |
|
@@ -304,6 +320,18 @@ def run_demo_server(pipe):
|
|
304 |
font-size: 20px !important;
|
305 |
color: crimson !important;
|
306 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
""",
|
308 |
head="""
|
309 |
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
|
@@ -317,7 +345,7 @@ def run_demo_server(pipe):
|
|
317 |
) as demo:
|
318 |
gr.Markdown(
|
319 |
"""
|
320 |
-
|
321 |
<p align="center">
|
322 |
<a title="Website" href="https://marigoldmonodepth.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
323 |
<img src="https://www.obukhov.ai/img/badges/badge-website.svg">
|
@@ -336,9 +364,10 @@ def run_demo_server(pipe):
|
|
336 |
Marigold-LCM is the fast version of Marigold, the state-of-the-art depth estimator for images in the wild.
|
337 |
It combines the power of the original Marigold 10-step estimator and the Latent Consistency Models, delivering high-quality results in as little as <b>one step</b>.
|
338 |
We provide three functions in this demo: Image, Video, and Bas-relief 3D processing — <b>see the tabs below</b>.
|
339 |
-
Upload your content into the <b>
|
340 |
-
Wait a second (for images and 3D) or a minute (for videos), and interact with the result in the <b>
|
341 |
To avoid queuing, fork the demo into your profile.
|
|
|
342 |
</p>
|
343 |
"""
|
344 |
)
|
@@ -474,8 +503,6 @@ def run_demo_server(pipe):
|
|
474 |
<p align="justify">
|
475 |
This part of the demo uses Marigold-LCM to create a bas-relief model.
|
476 |
The models are watertight, with correct normals, and exported in the STL format, which makes them <b>3D-printable</b>.
|
477 |
-
Start by uploading the image and click "Create" with the default parameters.
|
478 |
-
To improve the result, click "Clear", adjust the geometry sliders below, and click "Create" again.
|
479 |
</p>
|
480 |
""",
|
481 |
)
|
@@ -487,7 +514,6 @@ def run_demo_server(pipe):
|
|
487 |
)
|
488 |
with gr.Row():
|
489 |
bas_submit_btn = gr.Button(value="Create 3D", variant="primary")
|
490 |
-
bas_clear_btn = gr.Button(value="Clear")
|
491 |
bas_reset_btn = gr.Button(value="Reset")
|
492 |
with gr.Accordion("3D printing demo: Main options", open=True):
|
493 |
bas_plane_near = gr.Slider(
|
@@ -703,13 +729,8 @@ def run_demo_server(pipe):
|
|
703 |
concurrency_limit=1,
|
704 |
)
|
705 |
|
706 |
-
def wrapper_process_pipe_bas(*args, **kwargs):
|
707 |
-
out = list(process_pipe_bas(*args, **kwargs))
|
708 |
-
out = [gr.Button(interactive=False), gr.Image(interactive=False)] + out
|
709 |
-
return out
|
710 |
-
|
711 |
bas_submit_btn.click(
|
712 |
-
fn=
|
713 |
inputs=[
|
714 |
bas_input,
|
715 |
bas_plane_near,
|
@@ -725,18 +746,7 @@ def run_demo_server(pipe):
|
|
725 |
bas_frame_near,
|
726 |
bas_frame_far,
|
727 |
],
|
728 |
-
outputs=[
|
729 |
-
concurrency_limit=1,
|
730 |
-
)
|
731 |
-
|
732 |
-
bas_clear_btn.click(
|
733 |
-
fn=lambda: (gr.Button(interactive=True), None, None),
|
734 |
-
inputs=[],
|
735 |
-
outputs=[
|
736 |
-
bas_submit_btn,
|
737 |
-
bas_output_viewer,
|
738 |
-
bas_output_files,
|
739 |
-
],
|
740 |
concurrency_limit=1,
|
741 |
)
|
742 |
|
@@ -790,21 +800,14 @@ def run_demo_server(pipe):
|
|
790 |
|
791 |
|
792 |
def main():
|
793 |
-
CHECKPOINT = "prs-eth/marigold-v1-0"
|
794 |
-
CHECKPOINT_UNET_LCM = "prs-eth/marigold-lcm-v1-0"
|
795 |
|
796 |
if "HF_TOKEN_LOGIN" in os.environ:
|
797 |
login(token=os.environ["HF_TOKEN_LOGIN"])
|
798 |
|
799 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
800 |
|
801 |
-
pipe = MarigoldDepthConsistencyPipeline.from_pretrained(
|
802 |
-
CHECKPOINT,
|
803 |
-
unet=UNet2DConditionModel.from_pretrained(
|
804 |
-
CHECKPOINT_UNET_LCM, subfolder="unet", use_auth_token=True
|
805 |
-
),
|
806 |
-
)
|
807 |
-
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
808 |
try:
|
809 |
import xformers
|
810 |
|
|
|
1 |
+
# Copyright 2024 Anton Obukhov, ETH Zurich. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# --------------------------------------------------------------------------
|
15 |
+
# If you find this code useful, we kindly ask you to cite our paper in your work.
|
16 |
+
# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
|
17 |
+
# More information about the method can be found at https://marigoldmonodepth.github.io
|
18 |
+
# --------------------------------------------------------------------------
|
19 |
+
|
20 |
+
|
21 |
import functools
|
22 |
import os
|
23 |
+
import tempfile
|
24 |
import zipfile
|
25 |
from io import BytesIO
|
26 |
|
|
|
30 |
import numpy as np
|
31 |
import torch as torch
|
32 |
from PIL import Image
|
|
|
33 |
from gradio_imageslider import ImageSlider
|
34 |
from huggingface_hub import login
|
35 |
from tqdm import tqdm
|
|
|
71 |
denoise_steps=default_image_denoise_steps,
|
72 |
ensemble_size=default_image_ensemble_size,
|
73 |
processing_res=default_image_processing_res,
|
|
|
74 |
):
|
75 |
input_image = Image.open(path_input)
|
76 |
|
|
|
80 |
ensemble_size=ensemble_size,
|
81 |
processing_res=processing_res,
|
82 |
batch_size=1 if processing_res == 0 else 0,
|
83 |
+
seed=default_seed,
|
84 |
show_progress_bar=False,
|
85 |
)
|
86 |
|
|
|
88 |
depth_colored = pipe_out.depth_colored
|
89 |
depth_16bit = (depth_pred * 65535.0).astype(np.uint16)
|
90 |
|
91 |
+
path_output_dir = tempfile.mkdtemp()
|
|
|
92 |
|
93 |
name_base = os.path.splitext(os.path.basename(path_input))[0]
|
94 |
path_out_fp32 = os.path.join(path_output_dir, f"{name_base}_depth_fp32.npy")
|
|
|
116 |
out_max_frames=default_video_out_max_frames,
|
117 |
progress=gr.Progress(),
|
118 |
):
|
119 |
+
path_output_dir = tempfile.mkdtemp()
|
|
|
120 |
|
121 |
name_base = os.path.splitext(os.path.basename(path_input))[0]
|
122 |
path_out_vis = os.path.join(path_output_dir, f"{name_base}_depth_colored.mp4")
|
|
|
168 |
batch_size=0,
|
169 |
depth_latent_init=prev_depth_latent,
|
170 |
depth_latent_init_strength=depth_latent_init_strength,
|
171 |
+
return_depth_latent=True,
|
172 |
seed=default_seed,
|
173 |
show_progress_bar=False,
|
174 |
)
|
|
|
221 |
if plane_near >= plane_far:
|
222 |
raise gr.Error("NEAR plane must have a value smaller than the FAR plane")
|
223 |
|
224 |
+
path_output_dir = tempfile.mkdtemp()
|
|
|
225 |
|
226 |
name_base, name_ext = os.path.splitext(os.path.basename(path_input))
|
227 |
|
|
|
296 |
|
297 |
def run_demo_server(pipe):
|
298 |
process_pipe_image = spaces.GPU(functools.partial(process_image, pipe))
|
299 |
+
process_pipe_video = spaces.GPU(functools.partial(process_video, pipe), duration=120)
|
300 |
process_pipe_bas = spaces.GPU(functools.partial(process_bas, pipe))
|
301 |
os.environ["GRADIO_ALLOW_FLAGGING"] = "never"
|
302 |
|
|
|
320 |
font-size: 20px !important;
|
321 |
color: crimson !important;
|
322 |
}
|
323 |
+
h1 {
|
324 |
+
text-align: center;
|
325 |
+
display: block;
|
326 |
+
}
|
327 |
+
h2 {
|
328 |
+
text-align: center;
|
329 |
+
display: block;
|
330 |
+
}
|
331 |
+
h3 {
|
332 |
+
text-align: center;
|
333 |
+
display: block;
|
334 |
+
}
|
335 |
""",
|
336 |
head="""
|
337 |
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1FWSVCGZTG"></script>
|
|
|
345 |
) as demo:
|
346 |
gr.Markdown(
|
347 |
"""
|
348 |
+
# Marigold-LCM Depth Estimation
|
349 |
<p align="center">
|
350 |
<a title="Website" href="https://marigoldmonodepth.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
351 |
<img src="https://www.obukhov.ai/img/badges/badge-website.svg">
|
|
|
364 |
Marigold-LCM is the fast version of Marigold, the state-of-the-art depth estimator for images in the wild.
|
365 |
It combines the power of the original Marigold 10-step estimator and the Latent Consistency Models, delivering high-quality results in as little as <b>one step</b>.
|
366 |
We provide three functions in this demo: Image, Video, and Bas-relief 3D processing — <b>see the tabs below</b>.
|
367 |
+
Upload your content into the <b>first</b> pane, or click any of the <b>examples</b> below.
|
368 |
+
Wait a second (for images and 3D) or a minute (for videos), and interact with the result in the <b>second</b> pane.
|
369 |
To avoid queuing, fork the demo into your profile.
|
370 |
+
<a href="https://huggingface.co/spaces/prs-eth/marigold">The original Marigold demo is also available</a>.
|
371 |
</p>
|
372 |
"""
|
373 |
)
|
|
|
503 |
<p align="justify">
|
504 |
This part of the demo uses Marigold-LCM to create a bas-relief model.
|
505 |
The models are watertight, with correct normals, and exported in the STL format, which makes them <b>3D-printable</b>.
|
|
|
|
|
506 |
</p>
|
507 |
""",
|
508 |
)
|
|
|
514 |
)
|
515 |
with gr.Row():
|
516 |
bas_submit_btn = gr.Button(value="Create 3D", variant="primary")
|
|
|
517 |
bas_reset_btn = gr.Button(value="Reset")
|
518 |
with gr.Accordion("3D printing demo: Main options", open=True):
|
519 |
bas_plane_near = gr.Slider(
|
|
|
729 |
concurrency_limit=1,
|
730 |
)
|
731 |
|
|
|
|
|
|
|
|
|
|
|
732 |
bas_submit_btn.click(
|
733 |
+
fn=process_pipe_bas,
|
734 |
inputs=[
|
735 |
bas_input,
|
736 |
bas_plane_near,
|
|
|
746 |
bas_frame_near,
|
747 |
bas_frame_far,
|
748 |
],
|
749 |
+
outputs=[bas_output_viewer, bas_output_files],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
750 |
concurrency_limit=1,
|
751 |
)
|
752 |
|
|
|
800 |
|
801 |
|
802 |
def main():
|
803 |
+
CHECKPOINT = "prs-eth/marigold-lcm-v1-0"
|
|
|
804 |
|
805 |
if "HF_TOKEN_LOGIN" in os.environ:
|
806 |
login(token=os.environ["HF_TOKEN_LOGIN"])
|
807 |
|
808 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
809 |
|
810 |
+
pipe = MarigoldDepthConsistencyPipeline.from_pretrained(CHECKPOINT)
|
|
|
|
|
|
|
|
|
|
|
|
|
811 |
try:
|
812 |
import xformers
|
813 |
|
extrude.py
CHANGED
@@ -1,3 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import math
|
2 |
import os
|
3 |
|
|
|
1 |
+
# Copyright 2024 Anton Obukhov, ETH Zurich. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# --------------------------------------------------------------------------
|
15 |
+
# If you find this code useful, we kindly ask you to cite our paper in your work.
|
16 |
+
# Please find bibtex at: https://github.com/prs-eth/Marigold#-citation
|
17 |
+
# More information about the method can be found at https://marigoldmonodepth.github.io
|
18 |
+
# --------------------------------------------------------------------------
|
19 |
+
|
20 |
+
|
21 |
import math
|
22 |
import os
|
23 |
|
marigold_depth_estimation_lcm.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# Copyright 2024
|
2 |
#
|
3 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
# you may not use this file except in compliance with the License.
|
@@ -119,6 +119,7 @@ class MarigoldDepthConsistencyPipeline(DiffusionPipeline):
|
|
119 |
batch_size: int = 0,
|
120 |
depth_latent_init: torch.Tensor = None,
|
121 |
depth_latent_init_strength: float = 0.1,
|
|
|
122 |
seed: int = None,
|
123 |
color_map: str = "Spectral",
|
124 |
show_progress_bar: bool = True,
|
@@ -147,6 +148,8 @@ class MarigoldDepthConsistencyPipeline(DiffusionPipeline):
|
|
147 |
Initial depth map latent for better temporal consistency.
|
148 |
depth_latent_init_strength (`float`, *optional*, defaults to `0.1`)
|
149 |
Degree of initial depth latent influence, must be between 0 and 1.
|
|
|
|
|
150 |
seed (`int`, *optional*, defaults to `None`)
|
151 |
Reproducibility seed.
|
152 |
show_progress_bar (`bool`, *optional*, defaults to `True`):
|
@@ -247,8 +250,11 @@ class MarigoldDepthConsistencyPipeline(DiffusionPipeline):
|
|
247 |
min_d = torch.min(depth_pred)
|
248 |
max_d = torch.max(depth_pred)
|
249 |
depth_pred = (depth_pred - min_d) / (max_d - min_d)
|
250 |
-
if
|
251 |
-
|
|
|
|
|
|
|
252 |
|
253 |
# Convert to numpy
|
254 |
depth_pred = depth_pred.cpu().numpy().astype(np.float32)
|
@@ -385,7 +391,7 @@ class MarigoldDepthConsistencyPipeline(DiffusionPipeline):
|
|
385 |
).sample # [B, 4, h, w]
|
386 |
|
387 |
# compute the previous noisy sample x_t -> x_t-1
|
388 |
-
depth_latent = self.scheduler.step(noise_pred, t, depth_latent).prev_sample
|
389 |
|
390 |
depth = self._decode_depth(depth_latent)
|
391 |
|
|
|
1 |
+
# Copyright 2024 Bingxin Ke, Anton Obukhov, ETH Zurich and The HuggingFace Team. All rights reserved.
|
2 |
#
|
3 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
# you may not use this file except in compliance with the License.
|
|
|
119 |
batch_size: int = 0,
|
120 |
depth_latent_init: torch.Tensor = None,
|
121 |
depth_latent_init_strength: float = 0.1,
|
122 |
+
return_depth_latent: bool = False,
|
123 |
seed: int = None,
|
124 |
color_map: str = "Spectral",
|
125 |
show_progress_bar: bool = True,
|
|
|
148 |
Initial depth map latent for better temporal consistency.
|
149 |
depth_latent_init_strength (`float`, *optional*, defaults to `0.1`)
|
150 |
Degree of initial depth latent influence, must be between 0 and 1.
|
151 |
+
return_depth_latent (`bool`, defaults to False)
|
152 |
+
Whether to return the depth latent.
|
153 |
seed (`int`, *optional*, defaults to `None`)
|
154 |
Reproducibility seed.
|
155 |
show_progress_bar (`bool`, *optional*, defaults to `True`):
|
|
|
250 |
min_d = torch.min(depth_pred)
|
251 |
max_d = torch.max(depth_pred)
|
252 |
depth_pred = (depth_pred - min_d) / (max_d - min_d)
|
253 |
+
if return_depth_latent:
|
254 |
+
if ensemble_size > 1:
|
255 |
+
depth_latent = self._encode_depth(2 * depth_pred - 1)
|
256 |
+
else:
|
257 |
+
depth_latent = None
|
258 |
|
259 |
# Convert to numpy
|
260 |
depth_pred = depth_pred.cpu().numpy().astype(np.float32)
|
|
|
391 |
).sample # [B, 4, h, w]
|
392 |
|
393 |
# compute the previous noisy sample x_t -> x_t-1
|
394 |
+
depth_latent = self.scheduler.step(noise_pred, t, depth_latent, generator=rng).prev_sample
|
395 |
|
396 |
depth = self._decode_depth(depth_latent)
|
397 |
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
gradio==4.
|
2 |
gradio-imageslider==0.0.16
|
3 |
pygltflib==1.16.1
|
4 |
trimesh==4.0.5
|
@@ -6,10 +6,11 @@ imageio
|
|
6 |
imageio-ffmpeg
|
7 |
Pillow
|
8 |
|
9 |
-
|
|
|
10 |
diffusers==0.27.2
|
11 |
matplotlib==3.8.2
|
12 |
scipy==1.11.4
|
13 |
torch==2.0.1
|
14 |
-
transformers
|
15 |
-
xformers
|
|
|
1 |
+
gradio==4.21.0
|
2 |
gradio-imageslider==0.0.16
|
3 |
pygltflib==1.16.1
|
4 |
trimesh==4.0.5
|
|
|
6 |
imageio-ffmpeg
|
7 |
Pillow
|
8 |
|
9 |
+
spaces>=0.25.0
|
10 |
+
accelerate>=0.22.0
|
11 |
diffusers==0.27.2
|
12 |
matplotlib==3.8.2
|
13 |
scipy==1.11.4
|
14 |
torch==2.0.1
|
15 |
+
transformers>=4.32.1
|
16 |
+
xformers>=0.0.21
|