File size: 7,531 Bytes
93db298
 
 
 
 
adfc825
93db298
 
 
6cfd1be
 
 
 
93db298
6cfd1be
93db298
6cfd1be
 
 
93db298
6cfd1be
 
93db298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cfd1be
 
 
 
 
 
 
 
 
 
 
 
 
adfc825
93db298
6cfd1be
93db298
6cfd1be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93db298
6cfd1be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adfc825
93db298
6cfd1be
 
 
 
 
 
 
 
 
93db298
6cfd1be
 
 
93db298
 
 
6cfd1be
 
 
93db298
 
 
6cfd1be
93db298
 
6cfd1be
 
93db298
 
 
 
 
 
 
6cfd1be
 
93db298
6cfd1be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93db298
6cfd1be
93db298
b42abb6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import PIL
import requests
import torch
import gradio as gr
import random
import datetime
from PIL import Image
import os
import time
from diffusers import (
    StableDiffusionInstructPix2PixPipeline,
    EulerAncestralDiscreteScheduler,
)

# Loading from Diffusers Library
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
    model_id, torch_dtype=torch.float16, revision="fp16"
)  # , safety_checker=None)
pipe.to("cuda")
pipe.enable_xformers_memory_efficient_attention()
pipe.unet.to(memory_format=torch.channels_last)

counter = 0


help_text = """ Note: Functionality to revert your changes to previous/original image can be released in future versions. For now only forward editing is available.

Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors  
and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) -

If you're not getting what you want, there may be a few reasons:
1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images 
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should 
be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image. 
                
2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance 
                scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely 
                linked to the source image `image`, usually at the expense of lower image quality.  

3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog").

4. Increasing the number of steps sometimes improves results.

5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try:
    * Cropping the image so the face takes up a larger portion of the frame.
"""


def chat(
    image_in,
    in_steps,
    in_guidance_scale,
    in_img_guidance_scale,
    image_hid,
    img_name,
    counter_out,
    prompt,
    history,
    progress=gr.Progress(track_tqdm=True),
):
    start = datetime.datetime.now()
    progress(0, desc="Starting...")
    # if message == "revert": --to add revert functionality later
    if counter_out > 0:
        edited_image = pipe(
            prompt,
            image=image_hid,
            num_inference_steps=int(in_steps),
            guidance_scale=float(in_guidance_scale),
            image_guidance_scale=float(in_img_guidance_scale),
        ).images[0]
        if os.path.exists(img_name):
            os.remove(img_name)
        temp_img_name = img_name[:-4] + str(int(time.time())) + ".png"
        # Create a file-like object
        with open(temp_img_name, "wb") as fp:
            # Save the image to the file-like object
            edited_image.save(fp)
        # Get the name of the saved image
        saved_image_name = fp.name
        # edited_image.save(temp_img_name) #, overwrite=True)
        counter_out += 1
    else:
        seed = random.randint(0, 1000000)
        img_name = f"./edited_image_{seed}.png"
        edited_image = pipe(
            prompt,
            image=image_in,
            num_inference_steps=int(in_steps),
            guidance_scale=float(in_guidance_scale),
            image_guidance_scale=float(in_img_guidance_scale),
        ).images[0]
        if os.path.exists(img_name):
            os.remove(img_name)
        with open(img_name, "wb") as fp:
            # Save the image to the file-like object
            edited_image.save(fp)
        # Get the name of the saved image
        saved_image_name2 = fp.name
    print(f"Ran in {datetime.datetime.now() - start}")
    history = history or []
    # Resizing (or not) the image for better display and adding supportive sample text
    add_text_list = [
        "There you go",
        "Enjoy your image!",
        "Nice work! Wonder what you gonna do next!",
        "Way to go!",
        "Does this work for you?",
        "Something like this?",
    ]
    if counter_out > 0:
        response = (
            random.choice(add_text_list) + '<img src="/file=' + saved_image_name + '">'
        )
        history.append((prompt, response))
        return history, history, edited_image, temp_img_name, counter_out
    else:
        response = (
            random.choice(add_text_list) + '<img src="/file=' + saved_image_name2 + '">'
        )  # IMG_NAME
        history.append((prompt, response))
        counter_out += 1
        return history, history, edited_image, img_name, counter_out


with gr.Blocks() as demo:
    gr.Markdown(
        """<h1><center> Chat Interface with InstructPix2Pix: Give Image Editing Instructions</h1></center>
    <p>For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings.<br/>
    <a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true">
    <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
    **Note: Please be advised that a safety checker has been implemented in this public space. 
    Any attempts to generate inappropriate or NSFW images will result in the display of a black screen 
    as a precautionary measure for the protection of all users. We appreciate your cooperation in 
    maintaining a safe and appropriate environment for all members of our community.**
    <p/>"""
    )
    with gr.Row():
        with gr.Column():
            image_in = gr.Image(type="pil", label="Original Image")
            text_in = gr.Textbox()
            state_in = gr.State()
            b1 = gr.Button("Edit the image!")
            with gr.Accordion(
                "Advance settings for Training and Inference", open=False
            ):
                gr.Markdown(
                    "Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale."
                )
                in_steps = gr.Number(
                    label="Enter the number of Inference steps", value=20
                )
                in_guidance_scale = gr.Slider(
                    1, 10, step=0.5, label="Set Guidance scale", value=7.5
                )
                in_img_guidance_scale = gr.Slider(
                    1, 10, step=0.5, label="Set Image Guidance scale", value=1.5
                )
                image_hid = gr.Image(type="pil", visible=False)
                img_name_temp_out = gr.Textbox(visible=False)
                counter_out = gr.Number(visible=False, value=0, precision=0)
        chatbot = gr.Chatbot()
    b1.click(
        chat,
        [
            image_in,
            in_steps,
            in_guidance_scale,
            in_img_guidance_scale,
            image_hid,
            img_name_temp_out,
            counter_out,
            text_in,
            state_in,
        ],
        [chatbot, state_in, image_hid, img_name_temp_out, counter_out],
    )  # , queue=True)
    gr.Markdown(help_text)

demo.queue(concurrency_count=10)
demo.launch(debug=True, width="80%", height=2000)