Spaces:

geetu040
/

DepthPro_SR_4x_384p

Running

App Files Files Community

geetu040 commited on 27 days ago

Commit

613748c

1 Parent(s): eee4e23

upload app

Browse files

Files changed (9) hide show

.gitattributes +3 -0
.gitignore +1 -0
app.py +24 -0
assets/examples/girl_praying.jpeg +3 -0
assets/examples/man_with_arms_open.jpeg +3 -0
assets/examples/man_with_camera_in_hand.jpeg +3 -0
assets/examples/myself.jpeg +3 -0
model.py +128 -0
requirements.txt +6 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .gradio

app.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import os
+import gradio as gr
+from model import predict
+description = """
+- This work is a part of the [DepthPro: Beyond Depth Estimation](https://github.com/geetu040/depthpro-beyond-depth) repository, which further explores this model's capabilities on:
+    - Image Segmentation - Human Segmentation
+    - Image Super Resolution - 384px to 1536px (4x Upscaling)
+    - Image Super Resolution - 256px to 1024px (4x Upscaling)
+"""
+examples_dir = "assets/examples/"
+examples = [[os.path.join(examples_dir, filename)] for filename in os.listdir(examples_dir)]
+interface = gr.Interface(
+    fn=predict,
+    inputs=gr.Image(type="pil"),
+    outputs=gr.Image(type="pil"),
+    title="DepthPro: Super Resolution: 384px to 1536px (4x Upscaling)",
+    description=description,
+    examples=examples,
+)
+if __name__ == "__main__":
+    interface.launch()

assets/examples/girl_praying.jpeg ADDED Viewed

Git LFS Details

SHA256: 0b9f8e0faad4036cd670df828fe6230884724f76af036b703327fa088c34d625
Pointer size: 130 Bytes
Size of remote file: 17.1 kB

assets/examples/man_with_arms_open.jpeg ADDED Viewed

Git LFS Details

SHA256: e00f87c2ec8a754ccced5f135582dc139cfb46825ef2b55bb0d314ffec8f88fb
Pointer size: 130 Bytes
Size of remote file: 25.3 kB

assets/examples/man_with_camera_in_hand.jpeg ADDED Viewed

Git LFS Details

SHA256: 581477594e8f30cab2ec7758414912452c07004c9cf9bfb007c21b197e23bc18
Pointer size: 130 Bytes
Size of remote file: 14.8 kB

assets/examples/myself.jpeg ADDED Viewed

Git LFS Details

SHA256: b5a0bac5f023e5ed08ae3d026d91c3970478ff5233788d63d9760fb36bdb7286
Pointer size: 130 Bytes
Size of remote file: 13.9 kB

model.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from PIL import Image
+import torch
+from huggingface_hub import hf_hub_download
+import matplotlib.pyplot as plt
+# custom installation from this PR: https://github.com/huggingface/transformers/pull/34583
+# !pip install git+https://github.com/geetu040/transformers.git@depth-pro-projects#egg=transformers
+from transformers import DepthProConfig, DepthProImageProcessorFast, DepthProForDepthEstimation
+# load DepthPro model, used as backbone
+config = DepthProConfig(
+    patch_size=192,
+    patch_embeddings_size=16,
+    num_hidden_layers=12,
+    intermediate_hook_ids=[11, 8, 7, 5],
+    intermediate_feature_dims=[256, 256, 256, 256],
+    scaled_images_ratios=[0.5, 1.0],
+    scaled_images_overlap_ratios=[0.5, 0.25],
+    scaled_images_feature_dims=[1024, 512],
+    use_fov_model=False,
+)
+depthpro_for_depth_estimation = DepthProForDepthEstimation(config)
+# create DepthPro for super resolution
+class DepthProForSuperResolution(torch.nn.Module):
+    def __init__(self, depthpro_for_depth_estimation):
+        super().__init__()
+        self.depthpro_for_depth_estimation = depthpro_for_depth_estimation
+        hidden_size = self.depthpro_for_depth_estimation.config.fusion_hidden_size
+        self.image_head = torch.nn.Sequential(
+            torch.nn.ConvTranspose2d(
+                in_channels=config.num_channels,
+                out_channels=hidden_size,
+                kernel_size=4, stride=2, padding=1
+            ),
+            torch.nn.ReLU(),
+        )
+        self.head = torch.nn.Sequential(
+            torch.nn.Conv2d(
+                in_channels=hidden_size,
+                out_channels=hidden_size,
+                kernel_size=3, stride=1, padding=1
+            ),
+            torch.nn.ReLU(),
+            torch.nn.ConvTranspose2d(
+                in_channels=hidden_size,
+                out_channels=hidden_size,
+                kernel_size=4, stride=2, padding=1
+            ),
+            torch.nn.ReLU(),
+            torch.nn.Conv2d(
+                in_channels=hidden_size,
+                out_channels=self.depthpro_for_depth_estimation.config.num_channels,
+                kernel_size=3, stride=1, padding=1
+            ),
+        )
+    def forward(self, pixel_values):
+        # x is the low resolution image
+        x = pixel_values
+        encoder_features = self.depthpro_for_depth_estimation.depth_pro(x).features
+        fused_hidden_state = self.depthpro_for_depth_estimation.fusion_stage(encoder_features)[-1]
+        x = self.image_head(x)
+        x = torch.nn.functional.interpolate(x, size=fused_hidden_state.shape[2:])
+        x = x + fused_hidden_state
+        x = self.head(x)
+        return x
+# initialize the model
+model = DepthProForSuperResolution(depthpro_for_depth_estimation)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+# load weights
+weights_path = hf_hub_download(repo_id="geetu040/DepthPro_SR_4x_384p", filename="model_weights.pth")
+model.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu')))
+# load image processor
+image_processor = DepthProImageProcessorFast(
+    do_resize=True,
+    size={"width": 384, "height": 384},
+    do_rescale=True,
+    do_normalize=True
+)
+# define crop function to ensure square image
+def crop_image(image):
+    """
+    Crops the image from the center to make aspect ratio 1:1.
+    """
+    width, height = image.size
+    min_dim = min(width, height)
+    left = (width - min_dim) // 2
+    top = (height - min_dim) // 2
+    right = left + min_dim
+    bottom = top + min_dim
+    image = image.crop((left, top, right, bottom))
+    return image
+def predict(image):
+	# inference
+	image = crop_image(image)
+	image = image.resize((384, 384), Image.Resampling.BICUBIC)
+	# prepare image for the model
+	inputs = image_processor(images=image, return_tensors="pt")
+	inputs = {k: v.to(device) for k, v in inputs.items()}
+	with torch.no_grad():
+		outputs = model(**inputs)
+	# convert tensors to PIL.Image
+	output = outputs[0]                        # extract the first and only batch
+	output = output.cpu()                      # unload from cuda if used
+	output = torch.permute(output, (1, 2, 0))  # (C, H, W) -> (H, W, C)
+	output = output * 0.5 + 0.5                # undo normalization
+	output = output * 255.                     # undo scaling
+	output = output.clip(0, 255.)              # fix out of range
+	output = output.numpy()                    # convert to numpy
+	output = output.astype('uint8')            # convert to PIL.Image compatible format
+	output = Image.fromarray(output)           # create PIL.Image object
+	return output

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+numpy
+pillow
+torch
+torchvision
+git+https://github.com/geetu040/transformers.git@depth-pro-projects#egg=transformers