import PIL import requests import torch import gradio as gr import random from PIL import Image import os import time from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler #Loading from Diffusers Library model_id = "timbrooks/instruct-pix2pix" pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16", safety_checker=None) pipe.to("cuda") #pipe.enable_attention_slicing() pipe.enable_xformers_memory_efficient_attention() pipe.unet.to(memory_format=torch.channels_last) help_text = """ **Note: Please be advised that a safety checker has been implemented in this public space. Any attempts to generate inappropriate or NSFW images will result in the display of a black screen as a precautionary measure to protect all users. We appreciate your cooperation in maintaining a safe and appropriate environment for all members of our community.** New features and bug-fixes: 1. Chat style interface 2. Now use **'reverse'** as prompt to get back the previous image after an unwanted edit 3. Use **'restart'** as prompt to get back to original image and start over! 4. Now you can load larger image files (~5 mb) as well Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) - If you're not getting what you want, there may be a few reasons: 1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image. 2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely linked to the source image `image`, usually at the expense of lower image quality. 3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog"). 4. Increasing the number of steps sometimes improves results. 5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try: * Cropping the image so the face takes up a larger portion of the frame. """ def previous(image): return image def upload_image(file): return Image.open(file) def upload_button_config(): return gr.update(visible=False) def upload_textbox_config(text_in): return gr.update(visible=True) def dummy_fn(): return 'dummy' def chat(btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name, counter_out, image_oneup, prompt, history, progress=gr.Progress(track_tqdm=True)): progress(0, desc="Starting...") if prompt != '' and prompt.lower() == 'reverse' : #--to add revert functionality later history = history or [] temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' image_oneup.save(temp_img_name) response = 'Reverted to the last image ' + '' history.append((prompt, response)) return history, history, image_oneup, temp_img_name, counter_out if prompt != '' and prompt.lower() == 'restart' : #--to add revert functionality later history = history or [] temp_img_name = img_name[:-4]+str(int(time.time()))+'.png' #Resizing the image basewidth = 512 wpercent = (basewidth/float(image_in.size[0])) hsize = int((float(image_in.size[1])*float(wpercent))) image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS) image_in.save(temp_img_name) response = 'Reverted to the last image ' + '' history.append((prompt, response)) return history, history, image_in, temp_img_name, counter_out #adding supportive sample text add_text_list = ["There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?"] if counter_out == 0: t1 = time.time() print(f"Time at start = {t1}") seed = random.randint(0, 1000000) img_name = f"./edited_image_{seed}.png" #convert file object to image image_in = Image.open(btn_upload) #Resizing the image basewidth = 512 wpercent = (basewidth/float(image_in.size[0])) hsize = int((float(image_in.size[1])*float(wpercent))) image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS) #if os.path.exists(img_name): # os.remove(img_name) #with open(img_name, "wb") as fp: # Save the image to the file-like object image_in.save(img_name) #Get the name of the saved image #saved_image_name0 = fp.name history = history or [] response = '' history.append((prompt, response)) counter_out += 1 t2 = time.time() print(f"Time at end = {t2}") time_diff = t2-t1 print(f"Time taken = {time_diff}") return history, history, image_in, img_name, counter_out elif counter_out == 1: #instruct-pix2pix inference edited_image = pipe(prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0] if os.path.exists(img_name): os.remove(img_name) temp_img_name = img_name[:-4]+str(int(time.time()))[-4:] +'.png' with open(temp_img_name, "wb") as fp: # Save the image to the file-like object edited_image.save(fp) #Get the name of the saved image saved_image_name1 = fp.name history = history or [] response = random.choice(add_text_list) + '' #IMG_NAME history.append((prompt, response)) counter_out += 1 return history, history, edited_image, temp_img_name, counter_out elif counter_out > 1: edited_image = pipe(prompt, image=image_hid, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0] if os.path.exists(img_name): os.remove(img_name) temp_img_name = img_name[:-4]+str(int(time.time()))[-4:]+'.png' # Create a file-like object with open(temp_img_name, "wb") as fp: # Save the image to the file-like object edited_image.save(fp) #Get the name of the saved image saved_image_name2 = fp.name #edited_image.save(temp_img_name) #, overwrite=True) history = history or [] response = random.choice(add_text_list) + '' history.append((prompt, response)) counter_out += 1 return history, history, edited_image, temp_img_name, counter_out #Blocks layout with gr.Blocks(css="style.css") as demo: with gr.Column(elem_id="col-container") as main_col: gr.HTML("""

ChatPix2Pix: Image Editing by Instructions

For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings Duplicate Space Diffusers implementation of instruct-pix2pix - InstructPix2Pix: Learning to Follow Image Editing Instructions!

""") #gr.Markdown("""

dummy

""") with gr.Accordion("Advance settings for Training and Inference", open=False): image_in = gr.Image(visible=False,type='pil', label="Original Image") gr.Markdown("Advance settings for - Number of Inference steps, Guidanace scale, and Image guidance scale.") in_steps = gr.Number(label="Enter the number of Inference steps", value = 20) in_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Guidance scale", value=7.5) in_img_guidance_scale = gr.Slider(1,10, step=0.5, label="Set Image Guidance scale", value=1.5) image_hid = gr.Image(type='pil', visible=False) image_oneup = gr.Image(type='pil', visible=False) img_name_temp_out = gr.Textbox(visible=False) counter_out = gr.Number(visible=False, value=0, precision=0) dummy_num = gr.Number(visible=False) #with gr.Row(): text_in = gr.Textbox(value='', Placeholder="Type your instructions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can edit your image with Instructions') btn_upload = gr.UploadButton("Upload image", file_types=["image"], file_count="single", elem_id="upload_button") chatbot = gr.Chatbot(elem_id = 'chatbot-component') state_in = gr.State() #text_out_dummy = gr.Textbox(visbile = False, elem_id = 'dummy_elem') #btn_upload = gr.UploadButton("Upload image", file_types=["image"], file_count="single", elem_id="upload_button") #with gr.Row(): # btn_upload = gr.UploadButton("Upload image", file_types=["image"], file_count="single", elem_id="upload_button") # text_in = gr.Textbox(value='', Placeholder="Enter your instructions here", elem_id = "input_prompt") # #btn_upload = gr.UploadButton("Upload image", file_types=["image"], file_count="single", elem_id="upload_button") #text_out_dummy = gr.Textbox(visbile = False, elem_id = 'dummy_elem') element_dummy = gr.HTML(visbile = False, elem_id = 'dummy_elem') #Using Event Listeners btn_upload.upload(chat, [btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup, text_in, state_in], [chatbot, state_in, image_in, img_name_temp_out, counter_out]) btn_upload.upload(fn = upload_textbox_config, inputs=text_in, outputs = text_in) text_in.submit(chat,[btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name_temp_out,counter_out, image_oneup, text_in, state_in], [chatbot, state_in, image_hid, img_name_temp_out, counter_out]) text_in.submit(previous, [image_hid], [image_oneup]) chatbot.change(fn = upload_button_config, outputs=btn_upload) #, scroll_to_output = True) text_in.submit(None, [], [], _js = "() => document.getElementById('#chatbot-component').scrollTop = document.getElementById('#chatbot-component').scrollHeight") #text_in.submit(None, [], main_col, _js = "(x) => x.scrollIntoView(false)") #text_in.submit(None, [], main_col, _js = "(x) => x.scrollTo(0, x.scrollHeight)") # or using chatbot #text_in.submit(None, [], chatbot, _js = "() => {const element = document.getElementById('#chatbot-component'); element.scrollTop = element.scrollHeight; }") #counter_out.click(fn = upload_button_config, outputs=btn_upload) #chatbot.change(dummy_fn, inputs=[], outputs=[btn_upload], scroll_to_output = True) #gr.Markdown(help_text) #text_in.submit(None, [text_in], text_out, _js="(x) => {let newElement = document.createElement('div') newElement.innerHTML = x document.getElementById('chatbot-component').appendChild(newElement) newElement.scrollIntoView() }") #text_in.submit(None, [], None, _js="() => {let chatbot = document.getElementById('chatbot-component'); chatbot.scrollTo(0, chatbot.scrollHeight);}") #text_in.submit(None, [], None, _js="() => {document.querySelector('#chatbot-component').scrollTop = document.querySelector('#chatbot-component').scrollHeight;}") #text_in.submit(None, [], None, _js="() => {let chatbot = document.querySelector('#col-container'); chatbot.scrollTop = chatbot.scrollHeight;}") #demo.load(fn = dummy_fn, outputs=text_out_dummy, scroll_to_output = True) gr.Markdown(help_text, elem_id = 'help_text') #gr.HTML("""Expand/Close""") demo.queue(concurrency_count=3) demo.launch(debug=True) #, width="80%", height=2000)