File size: 4,778 Bytes
05ca552
 
d0ce882
05ca552
 
bcc70ee
7e594de
f990ec0
946ae28
a051380
108ebf6
05ca552
a051380
05ca552
3d7ab9b
05ca552
108ebf6
3d7ab9b
05ca552
 
 
82f8486
05ca552
 
007e73b
b475ab0
82f8486
ba5d2e8
 
 
 
 
 
 
20d74db
ede6809
05ca552
b0f2516
 
4c97218
b0f2516
05ca552
b0f2516
 
05ca552
b297a29
 
 
 
 
 
 
 
 
 
d104ddb
b297a29
 
 
2408667
b297a29
5517957
04a826a
b297a29
d104ddb
16e5121
 
 
 
 
25e159d
b297a29
 
 
 
e3e69da
a624675
5517957
a624675
e3e69da
 
b297a29
05ca552
bdca464
 
 
e3e69da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdca464
 
 
 
 
 
 
9365df7
007e73b
bdca464
578e047
bdca464
9365df7
b0f2516
bdca464
50d06b5
bdca464
b0f2516
bdca464
9cd8b4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
from PIL import Image
from io import BytesIO
import torch
import os

#os.system("pip install git+https://github.com/fffiloni/diffusers")

from diffusers import DiffusionPipeline, DDIMScheduler
from imagic import ImagicStableDiffusionPipeline

has_cuda = torch.cuda.is_available()
device = "cuda" 

pipe = ImagicStableDiffusionPipeline.from_pretrained(
    "CompVis/stable-diffusion-v1-4",
    safety_checker=None,
    #custom_pipeline=ImagicStableDiffusionPipeline,
    scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
).to(device)

generator = torch.Generator("cuda").manual_seed(0)

def infer(prompt, init_image):
    init_image = Image.open(init_image).convert("RGB")
    init_image = init_image.resize((512, 512))

   
    res = pipe.train(
        prompt,
        init_image,
        guidance_scale=7.5,
        num_inference_steps=50,
        generator=generator,
        text_embedding_optimization_steps=500,
        model_fine_tuning_optimization_steps=500)
    
    with torch.no_grad():
        torch.cuda.empty_cache()
    
    res = pipe(alpha=1)

    return res.images[0]
    #return 'trained success'

title = """
    <div style="text-align: center; max-width: 650px; margin: 0 auto;">
        <div
        style="
            display: inline-flex;
            align-items: center;
            gap: 0.8rem;
            font-size: 1.75rem;
        "
        >
        <h1 style="font-weight: 900; margin-top: 7px;">
            Imagic Stable Diffusion • Community Pipeline
        </h1>
        </div>
         <p style="margin-top: 10px; font-size: 94%">
        Text-Based Real Image Editing with Diffusion Models
        <br />This pipeline aims to implement <a href="https://arxiv.org/abs/2210.09276" target="_blank">this paper</a> to Stable Diffusion, allowing for real-world image editing.
        
        </p>
        <br /><img src="https://user-images.githubusercontent.com/788417/196388568-4ee45edd-e990-452c-899f-c25af32939be.png" style="margin:7px 0 20px;"/>
       
        <p style="font-size: 94%">
            You can skip the queue by duplicating this space: 
            <a style="display: flex;align-items: center;justify-content: center;height: 30px;" href="https://huggingface.co/spaces/fffiloni/imagic-stable-diffusion?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a>       
        </p>

    </div>
"""

article = """
    <div class="footer">
        <p><a href="https://github.com/huggingface/diffusers/tree/main/examples/community#imagic-stable-diffusion" target="_blank">Community pipeline</a> 
        baked by <a href="https://github.com/MarkRich" style="text-decoration: underline;" target="_blank">Mark Rich</a> - 
        Gradio Demo by 🤗 <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a>
        </p>
    </div>
"""

css = '''
    #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
    a {text-decoration-line: underline; font-weight: 600;}
    .footer {
            margin-bottom: 45px;
            margin-top: 35px;
            text-align: center;
            border-bottom: 1px solid #e5e5e5;
        }
        .footer>p {
            font-size: .8rem;
            display: inline-block;
            padding: 0 10px;
            transform: translateY(10px);
            background: white;
        }
        .dark .footer {
            border-color: #303030;
        }
        .dark .footer>p {
            background: #0b0f19;
        }
'''


with gr.Blocks(css=css) as block:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)

        prompt_input = gr.Textbox(label="Target text", placeholder="Describe the image with what you want to change about the subject")
        image_init = gr.Image(source="upload", type="filepath",label="Input Image")
        
        submit_btn = gr.Button("Train")
        
        image_output = gr.Image(label="Edited image")
        #text_output = gr.Image(label="trained status")
        
        gr.HTML(article)

    submit_btn.click(fn=infer, inputs=[prompt_input,image_init], outputs=[image_output])
    
block.queue(max_size=12).launch(show_api=False)