danube2024 commited on
Commit
2101ee0
·
verified ·
1 Parent(s): 06da073

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -63
app.py CHANGED
@@ -1,35 +1,33 @@
1
  import gradio as gr
2
- from transformers import DPTFeatureExtractor, DPTForDepthEstimation
3
- from diffusers import StableDiffusionPipeline
4
  import torch
5
  import numpy as np
 
 
6
  from PIL import Image, ImageEnhance, ImageOps
7
- import open3d as o3d
8
 
9
- # Force CPU usage (since environment is CPU-only) and 32-bit float
10
  device = "cpu"
11
  torch_dtype = torch.float32
12
 
13
- # 1) Initialize the text-to-image pipeline
14
- # Keep resolution moderate (512x512) + fewer steps to reduce time.
15
  text_to_image_pipeline = StableDiffusionPipeline.from_pretrained(
16
- "stabilityai/stable-diffusion-2-1-base",
17
  torch_dtype=torch_dtype
18
  ).to(device)
19
 
20
- def enhance_depth_map(depth_array):
21
- """
22
- Simple PIL-based enhancements (no OpenCV needed):
23
- - Normalize depth [min, max] -> [0, 255].
24
- - Use auto-contrast to boost local details.
25
- - Optionally sharpen to highlight edges.
26
- """
27
- # Normalize depth values to [0, 255]
28
- d_min, d_max = depth_array.min(), depth_array.max()
29
- depth_stretched = (depth_array - d_min) / (d_max - d_min + 1e-8)
30
- depth_stretched = (depth_stretched * 255.0).astype(np.uint8)
31
 
 
 
 
 
 
 
 
32
  depth_pil = Image.fromarray(depth_stretched)
 
 
33
  depth_pil = ImageOps.autocontrast(depth_pil)
34
 
35
  # Sharpen
@@ -38,61 +36,45 @@ def enhance_depth_map(depth_array):
38
 
39
  return depth_pil
40
 
41
- # 2) Load the Depth Model
42
- feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
43
- depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
44
-
45
- def generate_3d_from_text(prompt):
46
- # A) Generate a bas-relief-style image at moderate resolution (e.g. 512x512).
47
- # Use fewer inference steps on CPU to avoid timeouts.
48
- result = text_to_image_pipeline(
49
  prompt,
50
  height=512,
51
  width=512,
52
- num_inference_steps=20, # Lower steps => faster but less detail
53
- guidance_scale=7.5 # You can tune or keep default
54
- )
55
- generated_image = result.images[0]
56
-
57
- # B) Upscale the generated image *only for depth estimation*
58
- # This helps the depth model produce more “fine-grained” edges.
59
- upscale_width, upscale_height = 768, 768 # or 1024x1024 if you can handle it
60
- big_image = generated_image.resize((upscale_width, upscale_height), Image.LANCZOS)
61
-
62
- # C) Predict Depth on the upscaled image
63
- encoding = feature_extractor(big_image, return_tensors="pt").to(device)
64
  with torch.no_grad():
65
- outputs = depth_model(**encoding)
66
- predicted_depth = outputs.predicted_depth
67
-
68
- # D) Resize the depth map back to the upscaled size (768×768 here)
69
- # This will be the final "detailed" depth map.
70
- prediction = torch.nn.functional.interpolate(
71
- predicted_depth.unsqueeze(1),
72
- size=(upscale_height, upscale_width),
73
  mode="bicubic",
74
- align_corners=False,
75
  ).squeeze()
76
 
77
- # E) Convert to NumPy for final enhancement
78
- depth_array = prediction.cpu().numpy()
79
- depth_pil = enhance_depth_map(depth_array)
80
 
81
- return generated_image, depth_pil
82
-
83
- # Gradio app
84
- title = "Text to Bas-Relief & Detailed Depth Map (CPU-Friendly)"
85
- description = (
86
- "Generates a bas-relief-style image at 512x512 and produces an upscaled, "
87
- "more detailed depth map. Uses fewer steps to avoid timeouts."
88
- )
89
 
90
  iface = gr.Interface(
91
- fn=generate_3d_from_text,
92
- inputs=gr.Textbox(label="Enter text description", placeholder="A futuristic bas-relief sculpture of a lion..."),
93
- outputs=[gr.Image(label="Bas-Relief Image"), gr.Image(label="Detailed Depth Map")],
94
- title=title,
95
- description=description,
96
  )
97
 
98
  iface.launch()
 
1
  import gradio as gr
 
 
2
  import torch
3
  import numpy as np
4
+ from diffusers import StableDiffusionPipeline
5
+ from transformers import DPTFeatureExtractor, DPTForDepthEstimation
6
  from PIL import Image, ImageEnhance, ImageOps
 
7
 
 
8
  device = "cpu"
9
  torch_dtype = torch.float32
10
 
11
+ # 1) Load a custom bas-relief model
 
12
  text_to_image_pipeline = StableDiffusionPipeline.from_pretrained(
13
+ "KappaNeuro/bas-relief",
14
  torch_dtype=torch_dtype
15
  ).to(device)
16
 
17
+ # 2) Load depth model
18
+ feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
19
+ depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(device)
 
 
 
 
 
 
 
 
20
 
21
+ def enhance_depth(depth_arr):
22
+ # Min-max normalize => 0..255
23
+ d_min, d_max = depth_arr.min(), depth_arr.max()
24
+ depth_stretched = (depth_arr - d_min) / (d_max - d_min + 1e-8)
25
+ depth_stretched = (depth_stretched * 255).astype(np.uint8)
26
+
27
+ # Convert to PIL for further post-processing
28
  depth_pil = Image.fromarray(depth_stretched)
29
+
30
+ # Try auto-contrast or equalize
31
  depth_pil = ImageOps.autocontrast(depth_pil)
32
 
33
  # Sharpen
 
36
 
37
  return depth_pil
38
 
39
+ def generate_bas_relief_and_depth(prompt):
40
+ # A) Generate moderate-sized bas-relief
41
+ bas_relief = text_to_image_pipeline(
 
 
 
 
 
42
  prompt,
43
  height=512,
44
  width=512,
45
+ num_inference_steps=25,
46
+ guidance_scale=7.5
47
+ ).images[0]
48
+
49
+ # B) Upscale for depth model (try 768x768 or 1024x1024)
50
+ big_image = bas_relief.resize((768, 768), Image.LANCZOS)
51
+
52
+ # C) Predict depth on the upscaled image
53
+ inputs = feature_extractor(big_image, return_tensors="pt").to(device)
 
 
 
54
  with torch.no_grad():
55
+ outputs = depth_model(**inputs)
56
+ depth = outputs.predicted_depth
57
+
58
+ # D) Resize the depth to match the upscaled size
59
+ depth_resized = torch.nn.functional.interpolate(
60
+ depth.unsqueeze(1),
61
+ size=(768, 768),
 
62
  mode="bicubic",
63
+ align_corners=False
64
  ).squeeze()
65
 
66
+ # E) Enhance the depth map
67
+ depth_arr = depth_resized.cpu().numpy()
68
+ depth_pil = enhance_depth(depth_arr)
69
 
70
+ return bas_relief, depth_pil
 
 
 
 
 
 
 
71
 
72
  iface = gr.Interface(
73
+ fn=generate_bas_relief_and_depth,
74
+ inputs="text",
75
+ outputs=[gr.Image(label="Bas-Relief"), gr.Image(label="Depth Map")],
76
+ title="Custom Bas-Relief & Detailed Depth Map",
77
+ description="Generates bas-relief from a custom Hugging Face model with an upscaled depth map."
78
  )
79
 
80
  iface.launch()