danube2024 commited on
Commit
dff845d
·
verified ·
1 Parent(s): 75b29d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -14
app.py CHANGED
@@ -1,23 +1,62 @@
1
  import gradio as gr
2
- from diffusers import StableDiffusionPipeline, StableDiffusionDepth2ImgPipeline
 
 
 
3
  from PIL import Image
 
 
4
 
5
- # Initialize pipelines
6
- sd_pipeline = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1")
7
- sd_pipeline.to("cuda")
8
- depth_pipeline = StableDiffusionDepth2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-depth2img")
9
- depth_pipeline.to("cuda")
10
 
11
- def generate_images(prompt):
12
- base_image = sd_pipeline(prompt).images[0]
13
- depth_image = depth_pipeline(prompt=prompt, image=base_image).images[0]
14
- return base_image, depth_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  iface = gr.Interface(
17
- fn=generate_images,
18
- inputs=gr.Textbox(label="Prompt"),
19
- outputs=[gr.Image(label="Bas-relief"), gr.Image(label="Depth Map")],
20
- title="Text to Bas-Relief and Depth Map",
 
 
 
 
21
  )
22
 
23
  iface.launch()
 
1
  import gradio as gr
2
+ from transformers import DPTFeatureExtractor, DPTForDepthEstimation
3
+ from diffusers import StableDiffusionPipeline
4
+ import torch
5
+ import numpy as np
6
  from PIL import Image
7
+ import open3d as o3d
8
+ from pathlib import Path
9
 
10
+ # Initialize the models for CPU environment
11
+ device = "cpu"
12
+ torch_dtype = torch.float32
 
 
13
 
14
+ # Use a lighter text-to-image model optimized for CPU
15
+ text_to_image_pipeline = StableDiffusionPipeline.from_pretrained(
16
+ "stabilityai/stable-diffusion-2-1-base",
17
+ torch_dtype=torch_dtype
18
+ )
19
+ text_to_image_pipeline.to(device)
20
+
21
+ # Load depth estimation models
22
+ feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
23
+ depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
24
+
25
+ def generate_3d_from_text(prompt):
26
+ # Step 1: Generate Image from Text Prompt
27
+ generated_image = text_to_image_pipeline(prompt).images[0]
28
+
29
+ # Step 2: Estimate Depth from Generated Image
30
+ encoding = feature_extractor(generated_image, return_tensors="pt")
31
+ with torch.no_grad():
32
+ outputs = depth_model(**encoding)
33
+ predicted_depth = outputs.predicted_depth
34
+
35
+ # Resize depth map to original image size
36
+ prediction = torch.nn.functional.interpolate(
37
+ predicted_depth.unsqueeze(1),
38
+ size=generated_image.size[::-1],
39
+ mode="bicubic",
40
+ align_corners=False,
41
+ ).squeeze()
42
+ depth_image = (prediction.cpu().numpy() * 255 / np.max(prediction.cpu().numpy())).astype("uint8")
43
+ depth_image_pil = Image.fromarray(depth_image)
44
+
45
+ return generated_image, depth_image_pil
46
+
47
+ # Gradio Interface
48
+ title = "3D Model Generation from Text (CPU-friendly)"
49
+ description = "Generate a 3D model from a text description using a lightweight text-to-image and depth estimation."
50
 
51
  iface = gr.Interface(
52
+ fn=generate_3d_from_text,
53
+ inputs=gr.Textbox(label="Enter text description", placeholder="Describe your scene (e.g., 'A Roman soldier in armor')"),
54
+ outputs=[
55
+ gr.Image(label="Generated Image"),
56
+ gr.Image(label="Depth Map")
57
+ ],
58
+ title=title,
59
+ description=description,
60
  )
61
 
62
  iface.launch()