CantonMonkey commited on
Commit
30ac7d9
·
1 Parent(s): 2d810b3

attempt: text to image, diffusion

Browse files
Files changed (4) hide show
  1. .gitignore +22 -0
  2. README.md +9 -13
  3. app.py +46 -4
  4. requirements.txt +6 -0
.gitignore ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pycache
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+
7
+ # virtual env
8
+ venv/
9
+ .env/
10
+
11
+ # Jupyter Notebook checkpoints
12
+ .ipynb_checkpoints/
13
+
14
+ # HF space cache files (optional)
15
+ *.log
16
+ *.ckpt
17
+ *.safetensors
18
+ *.pt
19
+
20
+ # sys file
21
+ .DS_Store
22
+ Thumbs.db
README.md CHANGED
@@ -1,14 +1,10 @@
1
- ---
2
- title: TextTo3DScene
3
- emoji: 🐨
4
- colorFrom: yellow
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.46.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: text to 3D scene
12
- ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
1
+ # Text2Image HF Space
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ This Space demonstrates text-to-image generation using Stable Diffusion v1-4 + ControlNet.
4
+
5
+ - CPU compatible for development
6
+ - GPU recommended for faster generation
7
+ - Allows user to control:
8
+ - Prompt text
9
+ - Number of inference steps
10
+ - Output resolution
app.py CHANGED
@@ -1,7 +1,49 @@
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffusers import StableDiffusionPipeline, ControlNetModel, StableDiffusionControlNetPipeline
3
+ from PIL import Image
4
  import gradio as gr
5
 
6
+ # 自动选择设备
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+ print(f"Using device: {device}")
9
 
10
+ # 加载 ControlNet 模型
11
+ controlnet = ControlNetModel.from_pretrained(
12
+ "lllyasviel/sd-controlnet-canny", torch_dtype=torch.float32
13
+ )
14
+
15
+ # 加载 Stable Diffusion + ControlNet
16
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
17
+ "CompVis/stable-diffusion-v1-4",
18
+ controlnet=controlnet,
19
+ torch_dtype=torch.float32
20
+ ).to(device)
21
+
22
+ # CPU 下节省显存
23
+ pipe.enable_attention_slicing()
24
+
25
+ # 文生图生成函数
26
+ def generate_image(prompt, num_steps=20, height=256, width=256):
27
+ """
28
+ prompt: str, 文本描述
29
+ num_steps: int, 推理步数(CPU 可少一些)
30
+ height, width: int, 输出图像分辨率
31
+ """
32
+ image = pipe(prompt, num_inference_steps=num_steps, height=height, width=width).images[0]
33
+ return image
34
+
35
+ # Gradio 界面
36
+ interface = gr.Interface(
37
+ fn=generate_image,
38
+ inputs=[
39
+ gr.Textbox(label="Prompt", placeholder="Enter text prompt here..."),
40
+ gr.Slider(5, 50, value=20, step=1, label="Inference Steps"),
41
+ gr.Slider(128, 512, value=256, step=64, label="Height"),
42
+ gr.Slider(128, 512, value=256, step=64, label="Width"),
43
+ ],
44
+ outputs=gr.Image(type="pil"),
45
+ title="Text2Image Demo (v1-4 + ControlNet)",
46
+ description="Generate images from text using Stable Diffusion v1-4 + ControlNet (CPU/GPU compatible)"
47
+ )
48
+
49
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ diffusers[torch]
3
+ transformers
4
+ accelerate
5
+ Pillow
6
+ gradio