import gradio as gr from transformers import pipeline import torch import numpy as np from PIL import Image import gradio as gr from gradio_client import Client import os import spaces import json dpt_beit = pipeline(task = "depth-estimation", model="Intel/dpt-beit-base-384", device=0) depth_anything = pipeline(task = "depth-estimation", model="nielsr/depth-anything-small", device=0) dpt_large = pipeline(task = "depth-estimation", model="intel/dpt-large", device=0) def depth_anything_inference(img): return depth_anything(img)["depth"] def dpt_beit_inference(img): return dpt_beit(img)["depth"] def dpt_large_inference(img): return dpt_large(img)["depth"] @spaces.GPU def infer(img): if img is None: return None, None, None else: return dpt_large_inference(img), dpt_beit_inference(img), depth_anything_inference(img) css = """ #mkd { height: 500px; overflow: auto; border: 1px solid #ccc; } """ with gr.Blocks(css=css) as demo: gr.HTML("

Compare Depth Estimation Models

") gr.Markdown("In this Space, you can compare different depth estimation models: [DPT-Large](https://huggingface.co/Intel/dpt-large), [DPT with BeiT backbone](https://huggingface.co/Intel/dpt-beit-large-512) and the recent [Depth Anything Model small checkpoint](https://huggingface.co/LiheYoung/depth-anything-small-hf). 🤩") gr.Markdown("You can also see how they compare in terms of speed [here](https://huggingface2.notion.site/DPT-Benchmarks-1e516b0ba193460e865c47b3a5681efb?pvs=4).") gr.Markdown("Simply upload an image or try one of the examples to see the outputs.") with gr.Column(): with gr.Row(): input_img = gr.Image(label="Input Image", type="pil") with gr.Row(): output_1 = gr.Image(type="pil", label="DPT-Large") output_2 = gr.Image(type="pil", label="DPT with BeiT Backbone") output_3 = gr.Image(type="pil", label="Depth Anything") gr.Examples([["bee.jpg"], ["cat.png"], ["cats.png"]], inputs = input_img, outputs = [output_1, output_2, output_3], fn=infer, cache_examples=True, label='Click on any Examples below to get depth estimation results quickly 👇' ) input_img.change(infer, [input_img], [output_1, output_2, output_3]) demo.launch(debug=True)