File size: 9,617 Bytes
37d0f64
3c5fe6e
7a7e7aa
76a453a
7a7e7aa
 
33ca1ea
7a7e7aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48cafae
7a7e7aa
 
 
 
 
 
 
 
 
 
 
021663d
 
7a7e7aa
021663d
7a7e7aa
 
 
 
 
 
 
 
021663d
7a7e7aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dbde48
 
 
 
 
 
 
 
 
 
 
 
 
3c5fe6e
7a7e7aa
e1bbbd3
 
 
 
 
151fcc6
7a7e7aa
 
 
 
76a453a
 
 
7a7e7aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e59719
 
7a7e7aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33ca1ea
 
76a453a
7a7e7aa
33ca1ea
76a453a
33ca1ea
594539b
7a7e7aa
 
 
514b22c
e4bab3b
7a7e7aa
b6fbab7
 
 
 
 
d7229a9
 
 
 
 
 
 
 
 
b6fbab7
 
 
 
199341b
1ab5a83
5d1b6a2
b6fbab7
1ab5a83
2bf0aaf
b6fbab7
39a1348
 
 
d7229a9
 
 
b6fbab7
 
 
 
021663d
514b22c
b6fbab7
a11bb63
cff4bdb
a11bb63
 
 
 
bad7be7
b6fbab7
 
 
 
39a1348
b6fbab7
 
 
 
 
 
 
5d9fbe7
b6fbab7
 
 
 
 
 
223fa39
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224

import spaces
import os
import datetime
import einops
import gradio as gr
from gradio_imageslider import ImageSlider
import numpy as np
import torch
import random
from PIL import Image
from pathlib import Path
from torchvision import transforms
import torch.nn.functional as F
from torchvision.models import resnet50, ResNet50_Weights

from pytorch_lightning import seed_everything
from transformers import CLIPTextModel, CLIPTokenizer, CLIPImageProcessor
from diffusers import AutoencoderKL, DDIMScheduler, PNDMScheduler, DPMSolverMultistepScheduler, UniPCMultistepScheduler

from pipelines.pipeline_pasd import StableDiffusionControlNetPipeline
from myutils.misc import load_dreambooth_lora, rand_name
from myutils.wavelet_color_fix import wavelet_color_fix
from annotator.retinaface import RetinaFaceDetection

use_pasd_light = False
face_detector = RetinaFaceDetection()

if use_pasd_light:
    from models.pasd_light.unet_2d_condition import UNet2DConditionModel
    from models.pasd_light.controlnet import ControlNetModel
else:
    from models.pasd.unet_2d_condition import UNet2DConditionModel
    from models.pasd.controlnet import ControlNetModel

pretrained_model_path = "checkpoints/stable-diffusion-v1-5"
ckpt_path = "runs/pasd/checkpoint-100000"
#dreambooth_lora_path = "checkpoints/personalized_models/toonyou_beta3.safetensors"
dreambooth_lora_path = "checkpoints/personalized_models/majicmixRealistic_v6.safetensors"
#dreambooth_lora_path = "checkpoints/personalized_models/Realistic_Vision_V5.1.safetensors"
weight_dtype = torch.float16
device = "cuda"

scheduler = UniPCMultistepScheduler.from_pretrained(pretrained_model_path, subfolder="scheduler")
text_encoder = CLIPTextModel.from_pretrained(pretrained_model_path, subfolder="text_encoder")
tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_path, subfolder="tokenizer")
vae = AutoencoderKL.from_pretrained(pretrained_model_path, subfolder="vae")
feature_extractor = CLIPImageProcessor.from_pretrained(f"{pretrained_model_path}/feature_extractor")
unet = UNet2DConditionModel.from_pretrained(ckpt_path, subfolder="unet")
controlnet = ControlNetModel.from_pretrained(ckpt_path, subfolder="controlnet")
vae.requires_grad_(False)
text_encoder.requires_grad_(False)
unet.requires_grad_(False)
controlnet.requires_grad_(False)

unet, vae, text_encoder = load_dreambooth_lora(unet, vae, text_encoder, dreambooth_lora_path)

text_encoder.to(device, dtype=weight_dtype)
vae.to(device, dtype=weight_dtype)
unet.to(device, dtype=weight_dtype)
controlnet.to(device, dtype=weight_dtype)

validation_pipeline = StableDiffusionControlNetPipeline(
        vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, feature_extractor=feature_extractor, 
        unet=unet, controlnet=controlnet, scheduler=scheduler, safety_checker=None, requires_safety_checker=False,
    )
#validation_pipeline.enable_vae_tiling()
validation_pipeline._init_tiled_vae(decoder_tile_size=224)

weights = ResNet50_Weights.DEFAULT
preprocess = weights.transforms()
resnet = resnet50(weights=weights)
resnet.eval()

def resize_image(image_path, target_height):
    # Open the image file
    with Image.open(image_path) as img:
        # Calculate the ratio to resize the image to the target height
        ratio = target_height / float(img.size[1])
        # Calculate the new width based on the aspect ratio
        new_width = int(float(img.size[0]) * ratio)
        # Resize the image
        resized_img = img.resize((new_width, target_height), Image.LANCZOS)
        # Save the resized image
        #resized_img.save(output_path)
        return resized_img

@spaces.GPU(enable_queue=True)
def inference(input_image, prompt, a_prompt, n_prompt, denoise_steps, upscale, alpha, cfg, seed):
    
    #tempo fix for seed equals-1
    if seed == -1:
        seed = 0
        
    input_image = resize_image(input_image, 512)
    process_size = 768
    resize_preproc = transforms.Compose([
        transforms.Resize(process_size, interpolation=transforms.InterpolationMode.BILINEAR),
    ])
    
    # Get the current timestamp
    timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

    with torch.no_grad():
        seed_everything(seed)
        generator = torch.Generator(device=device)

        input_image = input_image.convert('RGB')
        batch = preprocess(input_image).unsqueeze(0)
        prediction = resnet(batch).squeeze(0).softmax(0)
        class_id = prediction.argmax().item()
        score = prediction[class_id].item()
        category_name = weights.meta["categories"][class_id]
        if score >= 0.1:
            prompt += f"{category_name}" if prompt=='' else f", {category_name}"

        prompt = a_prompt if prompt=='' else f"{prompt}, {a_prompt}"

        ori_width, ori_height = input_image.size
        resize_flag = False

        rscale = upscale
        input_image = input_image.resize((input_image.size[0]*rscale, input_image.size[1]*rscale))
        
        #if min(validation_image.size) < process_size:
        #    validation_image = resize_preproc(validation_image)

        input_image = input_image.resize((input_image.size[0]//8*8, input_image.size[1]//8*8))
        width, height = input_image.size
        resize_flag = True #

        try:
            image = validation_pipeline(
                    None, prompt, input_image, num_inference_steps=denoise_steps, generator=generator, height=height, width=width, guidance_scale=cfg, 
                    negative_prompt=n_prompt, conditioning_scale=alpha, eta=0.0,
                ).images[0]
            
            if True: #alpha<1.0:
                image = wavelet_color_fix(image, input_image)
        
            if resize_flag: 
                image = image.resize((ori_width*rscale, ori_height*rscale))
        except Exception as e:
            print(e)
            image = Image.new(mode="RGB", size=(512, 512))
    
    # Convert and save the image as JPEG
    image.save(f'result_{timestamp}.jpg', 'JPEG')

    # Convert and save the image as JPEG
    input_image.save(f'input_{timestamp}.jpg', 'JPEG')
    
    return (f"input_{timestamp}.jpg", f"result_{timestamp}.jpg"), f"result_{timestamp}.jpg"

title = "Pixel-Aware Stable Diffusion for Real-ISR"
description = "Gradio Demo for PASD Real-ISR. To use it, simply upload your image, or click one of the examples to load them."
article = "<a href='https://github.com/yangxy/PASD' target='_blank'>Github Repo Pytorch</a>"
#examples=[['samples/27d38eeb2dbbe7c9.png'],['samples/629e4da70703193b.png']]

css = """
#col-container{
    margin: 0 auto;
    max-width: 720px;
}
#project-links{
    margin: 0 0 12px !important;
    column-gap: 8px;
    display: flex;
    justify-content: center;
    flex-wrap: nowrap;
    flex-direction: row;
    align-items: center;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(f"""
        <h2 style="text-align: center;">
            PASD Magnify
        </h2>
        <p style="text-align: center;">
            Pixel-Aware Stable Diffusion for Realistic Image Super-resolution and Personalized Stylization
        </p>
        <p id="project-links" align="center">
            <a href='https://github.com/yangxy/PASD'><img src='https://img.shields.io/badge/Project-Page-Green'></a> <a href='https://huggingface.co/papers/2308.14469'><img src='https://img.shields.io/badge/Paper-Arxiv-red'></a>
        </p> 
        <p style="margin:12px auto;display: flex;justify-content: center;">
            <a href="https://huggingface.co/spaces/fffiloni/PASD?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" alt="Duplicate this Space"></a>
        </p>
        
        """)
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(type="filepath", sources=["upload"], value="samples/frog.png")
                prompt_in = gr.Textbox(label="Prompt", value="Frog")
                with gr.Accordion(label="Advanced settings", open=False):
                    added_prompt = gr.Textbox(label="Added Prompt", value='clean, high-resolution, 8k, best quality, masterpiece')
                    neg_prompt = gr.Textbox(label="Negative Prompt",value='dotted, noise, blur, lowres, oversmooth, longbody, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
                    denoise_steps = gr.Slider(label="Denoise Steps", minimum=10, maximum=50, value=20, step=1)
                    upsample_scale = gr.Slider(label="Upsample Scale", minimum=1, maximum=4, value=2, step=1)
                    condition_scale = gr.Slider(label="Conditioning Scale", minimum=0.5, maximum=1.5, value=1.1, step=0.1)
                    classifier_free_guidance = gr.Slider(label="Classier-free Guidance", minimum=0.1, maximum=10.0, value=7.5, step=0.1)
                    seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
                submit_btn = gr.Button("Submit")
            with gr.Column():
                b_a_slider = ImageSlider(label="B/A result", position=0.5)
                file_output = gr.File(label="Downloadable image result")
                
    submit_btn.click(
        fn = inference,
        inputs = [
            input_image, prompt_in,
            added_prompt, neg_prompt,
            denoise_steps,
            upsample_scale, condition_scale,
            classifier_free_guidance, seed
        ],
        outputs = [
            b_a_slider,
            file_output
        ]
    )
demo.queue(max_size=20).launch()