import PIL import requests import torch import gradio as gr import random from PIL import Image import os import time from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler #Loading from Diffusers Library model_id = "timbrooks/instruct-pix2pix" pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16") #, safety_checker=None) pipe.to("cuda") #pipe.enable_attention_slicing() pipe.enable_xformers_memory_efficient_attention() pipe.unet.to(memory_format=torch.channels_last) help_text = """ **Note: Please be advised that a safety checker has been implemented in this public space. Any attempts to generate inappropriate or NSFW images will result in the display of a black screen as a precautionary measure to protect all users. We appreciate your cooperation in maintaining a safe and appropriate environment for all members of our community.** New features and bug-fixes: 1. Chat style interface 2. Now use **'reverse'** as prompt to get back the previous image after an unwanted edit 3. Use **'restart'** as prompt to get back to original image and start over! 4. Now you can load larger image files (~5 mb) as well Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) - If you're not getting what you want, there may be a few reasons: 1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image. 2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely linked to the source image `image`, usually at the expense of lower image quality. 3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog"). 4. Increasing the number of steps sometimes improves results. 5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try: * Cropping the image so the face takes up a larger portion of the frame. """ css = """ #col-container {max-width: 580px; margin-left: auto; margin-right: auto;} a {text-decoration-line: underline; font-weight: 600;} .footer { margin-bottom: 45px; margin-top: 10px; text-align: center; border-bottom: 1px solid #e5e5e5; } .footer>p { font-size: .8rem; display: inline-block; padding: 0 10px; transform: translateY(10px); background: white; } .dark .footer { border-color: #303030; } .dark .footer>p { background: #0b0f19; } .animate-spin { animation: spin 1s linear infinite; } @keyframes spin { from { transform: rotate(0deg); } to { transform: rotate(360deg); } } """ def previous(image): return image def upload_image(file): return Image.open(file) def upload_button_config(): return gr.update(visible=False) def upload_textbox_config(text_in): return gr.update(visible=True) def chat(btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name, counter_out, image_oneup, prompt, history, progress=gr.Progress(track_tqdm=True)): progress(0, desc="Starting...") if prompt != '' and prompt.lower() == 'reverse' : #--to add revert functionality later history = history or [] temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' image_oneup.save(temp_img_name) response = 'Reverted to the last image ' + '' history.append((prompt, response)) return history, history, image_oneup, temp_img_name, counter_out if prompt != '' and prompt.lower() == 'restart' : #--to add revert functionality later history = history or [] temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' #Resizing the image basewidth = 512 wpercent = (basewidth/float(image_in.size[0])) hsize = int((float(image_in.size[1])*float(wpercent))) image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS) image_in.save(temp_img_name) response = 'Reverted to the last image ' + '' history.append((prompt, response)) return history, history, image_in, temp_img_name, counter_out #adding supportive sample text add_text_list = ["There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?"] if counter_out == 0: t1 = time.time() print(f"Time at start = {t1}") #convert file object to image image_in = Image.open(btn_upload) #Resizing the image basewidth = 512 wpercent = (basewidth/float(image_in.size[0])) hsize = int((float(image_in.size[1])*float(wpercent))) image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS) # Save the image to the file-like object seed = random.randint(0, 1000000) img_name = f"./edited_image_{seed}.png" image_in.save(img_name) #add state history = history or [] response = '' history.append((prompt, response)) counter_out += 1 t2 = time.time() print(f"Time at end = {t2}") time_diff = t2-t1 print(f"Time taken = {time_diff}") return history, history, image_in, img_name, counter_out elif counter_out == 1: #instruct-pix2pix inference edited_image = pipe(prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0] if os.path.exists(img_name): os.remove(img_name) temp_img_name = img_name[:-4]+str(int(time.time()))[-4:] +'.png' with open(temp_img_name, "wb") as fp: # Save the image to the file-like object edited_image.save(fp) #Get the name of the saved image saved_image_name1 = fp.name history = history or [] response = random.choice(add_text_list) + '' #IMG_NAME history.append((prompt, response)) counter_out += 1 return history, history, edited_image, temp_img_name, counter_out elif counter_out > 1: edited_image = pipe(prompt, image=image_hid, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0] if os.path.exists(img_name): os.remove(img_name) temp_img_name = img_name[:-4]+str(int(time.time()))[-4:]+'.png' # Create a file-like object with open(temp_img_name, "wb") as fp: # Save the image to the file-like object edited_image.save(fp) #Get the name of the saved image saved_image_name2 = fp.name #edited_image.save(temp_img_name) #, overwrite=True) history = history or [] response = random.choice(add_text_list) + '' history.append((prompt, response)) counter_out += 1 return history, history, edited_image, temp_img_name, counter_out #Blocks layout with gr.Blocks(css="style.css") as demo: with gr.Column(elem_id="col-container"): gr.HTML("""

ChatPix2Pix: Image Editing by Instructions

Hi, I'm a photoshop expert bot, start by uploading your image using the upload button, and then tell me what changes you want to make to it.
Duplicate SpaceDuplicate Space with GPU Upgrade for fast Inference & no queue
Based on Diffusers implementation of InstructPix2Pix.

""") with gr.Accordion("Advance settings for Training and Inference", open=False): image_in = gr.Image(visible=False,type='pil', label="Original Image") gr.Markdown("Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale.") in_steps = gr.Number(label="Enter the number of Inference steps", value = 20) in_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Guidance scale", value=7.5) in_img_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Image Guidance scale", value=1.5) image_hid = gr.Image(type='pil', visible=False) image_oneup = gr.Image(type='pil', visible=False) img_name_temp_out = gr.Textbox(visible=False) counter_out = gr.Number(visible=False, value=0, precision=0) #with gr.Row(): text_in = gr.Textbox(value='', placeholder="Type your instructions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can edit your image with Instructions') btn_upload = gr.UploadButton("Upload image to start editing", file_types=["image"], file_count="single", elem_id="upload_button") chatbot = gr.Chatbot(elem_id = 'chatbot-component', label='Conversational editing for Images') state_in = gr.State() #Using Event Listeners btn_upload.upload(chat, [btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup, text_in, state_in], [chatbot, state_in, image_in, img_name_temp_out, counter_out]) btn_upload.upload(fn = upload_textbox_config, inputs=text_in, outputs = text_in) text_in.submit(chat,[btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup, text_in, state_in], [chatbot, state_in, image_hid, img_name_temp_out, counter_out]) text_in.submit(previous, [image_hid], [image_oneup]) chatbot.change(fn = upload_button_config, outputs=btn_upload) #, scroll_to_output = True) text_in.submit(None, [], [], _js = "() => document.getElementById('#chatbot-component').scrollTop = document.getElementById('#chatbot-component').scrollHeight") #with gr.Accordion("Release Notes", open=False): gr.Markdown(help_text) demo.queue(concurrency_count=10) demo.launch(debug=True, width="80%", height=2000)