File size: 6,458 Bytes
52b67df
f92c162
 
52b67df
 
6ef1dc4
52b67df
 
6ef1dc4
 
52b67df
 
f92c162
52b67df
ae6a57b
 
 
 
 
f92c162
52b67df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f92c162
 
52b67df
f92c162
52b67df
f92c162
 
 
 
 
52b67df
f92c162
52b67df
 
 
 
 
 
 
 
ae6a57b
52b67df
f92c162
 
 
 
dcbae16
9e63d68
ce743f5
f92c162
 
 
 
 
 
9dcb09a
 
 
dcbae16
9dcb09a
 
 
 
ce743f5
f92c162
22231ac
f92c162
 
 
ce743f5
 
f92c162
ce743f5
cf4f8fb
ce743f5
 
 
 
 
 
 
f92c162
 
ce743f5
 
 
 
 
 
f92c162
ce743f5
 
 
 
 
 
 
 
 
 
 
 
f92c162
ce743f5
f92c162
ce743f5
 
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
 
52b67df
ce743f5
 
 
 
 
 
 
 
 
 
 
 
 
f92c162
ce743f5
dcbae16
ce743f5
f92c162
 
 
52b67df
f92c162
 
 
ae6a57b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import spaces
import random
import torch
from huggingface_hub import snapshot_download
from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter import StableDiffusionXLPipeline
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from kolors.models.unet_2d_condition import UNet2DConditionModel
from diffusers import AutoencoderKL, EulerDiscreteScheduler
import gradio as gr
import numpy as np

device = "cuda"
device = "cuda"
ckpt_dir = '/home/lixiang46/Kolors/weights/Kolors'
ckpt_IPA_dir = '/home/lixiang46/Kolors/weights/Kolors-IP-Adapter-Plus'
# ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
# ckpt_IPA_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus")

# Load models
text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_IPA_dir}/image_encoder',ignore_mismatched_sizes=True).to(dtype=torch.float16, device=device)
ip_img_size = 336
clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size)

pipe = StableDiffusionXLPipeline(
    vae=vae,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    scheduler=scheduler,
    image_encoder=image_encoder,
    feature_extractor=clip_image_processor,
    force_zeros_for_empty_prompt=False
).to(device)

if hasattr(pipe.unet, 'encoder_hid_proj'):
    pipe.unet.text_encoder_hid_proj = pipe.unet.encoder_hid_proj
    
pipe.load_ip_adapter( f'{ckpt_IPA_dir}' , subfolder="", weight_name=["ip_adapter_plus_general.bin"])

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048

def infer(prompt, ip_adapter_image, ip_adapter_scale, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
        
    generator = torch.Generator().manual_seed(seed)
    pipe.set_ip_adapter_scale([ip_adapter_scale])
    image = pipe(
            prompt= prompt ,
            ip_adapter_image=[ip_adapter_image],
            negative_prompt=negative_prompt, 
            height=height,
            width=width,
            num_inference_steps=num_inference_steps, 
            guidance_scale=guidance_scale,
            num_images_per_prompt=1,
            generator=generator
        ).images[0]
    
    return image

examples = [
    ["穿着黑色T恤衫,上面中文绿色大字写着“可图”", "image/test_ip.jpg", 0.5],
    ["一只可爱的小狗在奔跑", "image/test_ip2.png", 0.5]
]

if torch.cuda.is_available():
    power_device = "GPU"
else:
    power_device = "CPU"

css="""
#col-container {
    margin: 0 auto;
    max-width: 650px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Row():
        gr.Markdown(f"""
        # Kolors-IP-Adapter-Plus
        Currently running on {power_device}.
        """)
        
    with gr.Row():
        with gr.Column(elem_id="col-container"):
            with gr.Row():
                ip_adapter_image = gr.Image(label="IP-Adapter Image", type="pil")
            with gr.Row():
                ip_adapter_scale = gr.Slider(
                    label="Image influence scale",
                    info="Use 1 for creating variations",
                    minimum=0.0,
                    maximum=1.0,
                    step=0.05,
                    value=0.5,
                )
            with gr.Row():
                prompt = gr.Text(
                    label="Prompt",
                    show_label=False,
                    max_lines=1,
                    placeholder="Enter your prompt",
                    container=False,
                )
                run_button = gr.Button("Run", scale=0)
            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Text(
                    label="Negative prompt",
                    max_lines=1,
                    placeholder="Enter a negative prompt",
                    visible=True,
                )
                seed = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0,
                )
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                with gr.Row():
                    width = gr.Slider(
                        label="Width",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                    height = gr.Slider(
                        label="Height",
                        minimum=256,
                        maximum=MAX_IMAGE_SIZE,
                        step=32,
                        value=1024,
                    )
                with gr.Row():
                    guidance_scale = gr.Slider(
                        label="Guidance scale",
                        minimum=0.0,
                        maximum=10.0,
                        step=0.1,
                        value=5.0,
                    )
                    num_inference_steps = gr.Slider(
                        label="Number of inference steps",
                        minimum=10,
                        maximum=50,
                        step=1,
                        value=25,
                    )
            
        with gr.Column(elem_id="col-container"):
            result = gr.Image(label="Result", show_label=False)
    
    with gr.Row():
        gr.Examples(
                examples = examples,
                inputs = [prompt, ip_adapter_image, ip_adapter_scale]
            )

    run_button.click(
        fn = infer,
        inputs = [prompt, ip_adapter_image, ip_adapter_scale, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
        outputs = [result]
    )

demo.queue().launch(share=True)