Spaces:
Running
Running
import gradio as gr | |
import torch | |
import numpy as np | |
import diffusers | |
import os | |
from PIL import Image | |
hf_token = os.environ.get("HF_TOKEN") | |
from diffusers import StableDiffusionXLInpaintPipeline, DDIMScheduler, UNet2DConditionModel | |
from diffusers import ( | |
AutoencoderKL, | |
LCMScheduler, | |
) | |
from pipeline_controlnet_sd_xl import StableDiffusionXLControlNetPipeline | |
from controlnet import ControlNetModel, ControlNetConditioningEmbedding | |
import torch | |
import numpy as np | |
from PIL import Image | |
import requests | |
import PIL | |
from io import BytesIO | |
from torchvision import transforms | |
ratios_map = { | |
0.5:{"width":704,"height":1408}, | |
0.57:{"width":768,"height":1344}, | |
0.68:{"width":832,"height":1216}, | |
0.72:{"width":832,"height":1152}, | |
0.78:{"width":896,"height":1152}, | |
0.82:{"width":896,"height":1088}, | |
0.88:{"width":960,"height":1088}, | |
0.94:{"width":960,"height":1024}, | |
1.00:{"width":1024,"height":1024}, | |
1.13:{"width":1088,"height":960}, | |
1.21:{"width":1088,"height":896}, | |
1.29:{"width":1152,"height":896}, | |
1.38:{"width":1152,"height":832}, | |
1.46:{"width":1216,"height":832}, | |
1.67:{"width":1280,"height":768}, | |
1.75:{"width":1344,"height":768}, | |
2.00:{"width":1408,"height":704} | |
} | |
ratios = np.array(list(ratios_map.keys())) | |
image_transforms = transforms.Compose( | |
[ | |
transforms.ToTensor(), | |
] | |
) | |
default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers" | |
def get_masked_image(image, image_mask, width, height): | |
image_mask = image_mask # inpaint area is white | |
image_mask = image_mask.resize((width, height)) # object to remove is white (1) | |
image_mask_pil = image_mask | |
image = np.array(image.convert("RGB")).astype(np.float32) / 255.0 | |
image_mask = np.array(image_mask_pil.convert("L")).astype(np.float32) / 255.0 | |
assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size" | |
masked_image_to_present = image.copy() | |
masked_image_to_present[image_mask > 0.5] = (0.5,0.5,0.5) # set as masked pixel | |
image[image_mask > 0.5] = 0.5 # set as masked pixel - s.t. will be grey | |
image = Image.fromarray((image * 255.0).astype(np.uint8)) | |
masked_image_to_present = Image.fromarray((masked_image_to_present * 255.0).astype(np.uint8)) | |
return image, image_mask_pil, masked_image_to_present | |
def get_size(init_image): | |
w,h=init_image.size | |
curr_ratio = w/h | |
ind = np.argmin(np.abs(curr_ratio-ratios)) | |
ratio = ratios[ind] | |
chosen_ratio = ratios_map[ratio] | |
w,h = chosen_ratio['width'], chosen_ratio['height'] | |
return w,h | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Load, init model | |
# controlnet = ControlNetModel().from_pretrained("briaai/DEV-ControlNetInpaintingFast", torch_dtype=torch.float16) | |
controlnet = ControlNetModel().from_pretrained("briaai/DEV-GenerativeFill", torch_dtype=torch.float16) | |
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) | |
pipe = StableDiffusionXLControlNetPipeline.from_pretrained("briaai/BRIA-2.3", controlnet=controlnet.to(dtype=torch.float16), torch_dtype=torch.float16, vae=vae) #force_zeros_for_empty_prompt=False, # vae=vae) | |
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) | |
pipe.load_lora_weights("briaai/BRIA-2.3-FAST-LORA") | |
pipe.fuse_lora() | |
pipe = pipe.to(device) | |
# pipe.enable_xformers_memory_efficient_attention() | |
# generator = torch.Generator(device='cuda').manual_seed(123456) | |
vae = pipe.vae | |
pipe.enable_model_cpu_offload() | |
def read_content(file_path: str) -> str: | |
"""read the content of target file | |
""" | |
with open(file_path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
return content | |
def predict(dict, prompt="", negative_prompt = default_negative_prompt, guidance_scale=1.2, steps=12, seed=123456): | |
if negative_prompt == "": | |
negative_prompt = None | |
init_image = dict["image"].convert("RGB")#.resize((1024, 1024)) | |
mask = dict["mask"].convert("L")#.resize((1024, 1024)) | |
width, height = get_size(init_image) | |
init_image = init_image.resize((width, height)) | |
mask = mask.resize((width, height)) | |
masked_image, image_mask, masked_image_to_present = get_masked_image(init_image, mask, width, height) | |
masked_image_tensor = image_transforms(masked_image) | |
masked_image_tensor = (masked_image_tensor - 0.5) / 0.5 | |
masked_image_tensor = masked_image_tensor.unsqueeze(0).to(device="cuda") | |
control_latents = vae.encode( | |
masked_image_tensor[:, :3, :, :].to(vae.dtype) | |
).latent_dist.sample() | |
control_latents = control_latents * vae.config.scaling_factor | |
image_mask = np.array(image_mask)[:,:] | |
mask_tensor = torch.tensor(image_mask, dtype=torch.float32)[None, ...] | |
# binarize the mask | |
mask_tensor = torch.where(mask_tensor > 128.0, 255.0, 0) | |
mask_tensor = mask_tensor / 255.0 | |
mask_tensor = mask_tensor.to(device="cuda") | |
mask_resized = torch.nn.functional.interpolate(mask_tensor[None, ...], size=(control_latents.shape[2], control_latents.shape[3]), mode='nearest') | |
# mask_resized = mask_resized.to(torch.float16) | |
masked_image = torch.cat([control_latents, mask_resized], dim=1) | |
generator = torch.Generator(device='cuda').manual_seed(int(seed)) | |
output = pipe(prompt = prompt, | |
width=width, | |
height=height, | |
negative_prompt=negative_prompt, | |
image = masked_image, # control image V | |
init_image = init_image, | |
mask_image = mask_tensor, | |
guidance_scale = guidance_scale, | |
num_inference_steps=int(steps), | |
# strength=strength, | |
generator=generator, | |
controlnet_conditioning_sale=1.0) | |
torch.cuda.empty_cache | |
return output.images[0] #, gr.update(visible=True) | |
css = ''' | |
.gradio-container{max-width: 1100px !important} | |
#image_upload{min-height:400px} | |
#image_upload [data-testid="image"], #image_upload [data-testid="image"] > div{min-height: 400px} | |
#mask_radio .gr-form{background:transparent; border: none} | |
#word_mask{margin-top: .75em !important} | |
#word_mask textarea:disabled{opacity: 0.3} | |
.footer {margin-bottom: 45px;margin-top: 35px;text-align: center;border-bottom: 1px solid #e5e5e5} | |
.footer>p {font-size: .8rem; display: inline-block; padding: 0 10px;transform: translateY(10px);background: white} | |
.dark .footer {border-color: #303030} | |
.dark .footer>p {background: #0b0f19} | |
.acknowledgments h4{margin: 1.25em 0 .25em 0;font-weight: bold;font-size: 115%} | |
#image_upload .touch-none{display: flex} | |
@keyframes spin { | |
from { | |
transform: rotate(0deg); | |
} | |
to { | |
transform: rotate(360deg); | |
} | |
} | |
#share-btn-container {padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; max-width: 13rem; margin-left: auto;} | |
div#share-btn-container > div {flex-direction: row;background: black;align-items: center} | |
#share-btn-container:hover {background-color: #060606} | |
#share-btn {all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.5rem !important; padding-bottom: 0.5rem !important;right:0;} | |
#share-btn * {all: unset} | |
#share-btn-container div:nth-child(-n+2){width: auto !important;min-height: 0px !important;} | |
#share-btn-container .wrap {display: none !important} | |
#share-btn-container.hidden {display: none!important} | |
#prompt input{width: calc(100% - 160px);border-top-right-radius: 0px;border-bottom-right-radius: 0px;} | |
#run_button{position:absolute;margin-top: 11px;right: 0;margin-right: 0.8em;border-bottom-left-radius: 0px; | |
border-top-left-radius: 0px;} | |
#prompt-container{margin-top:-18px;} | |
#prompt-container .form{border-top-left-radius: 0;border-top-right-radius: 0} | |
#image_upload{border-bottom-left-radius: 0px;border-bottom-right-radius: 0px} | |
''' | |
image_blocks = gr.Blocks(css=css, elem_id="total-container") | |
with image_blocks as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown("## BRIA GenerativeFill") | |
gr.HTML(''' | |
<p style="margin-bottom: 10px; font-size: 94%"> | |
This is a demo for | |
<a href="https://huggingface.co/briaai/DEV-GenerativeFill" target="_blank">BRIA Generative Fill </a>. | |
BRIA Generative Fill improve the inpainting task for Modify/Add/Replace compared to BRIA 2.3 inpainting while still trained on licensed data, and so provide full legal liability coverage for copyright and privacy infringement. | |
</p> | |
''') | |
with gr.Row(): | |
with gr.Column(): | |
image = gr.Image(sources=['upload'], elem_id="image_upload", tool='sketch', type="pil", label="Upload", height=400) | |
with gr.Row(elem_id="prompt-container", equal_height=True): | |
with gr.Row(): | |
prompt = gr.Textbox(placeholder="Your prompt (what you want in place of what is erased)", show_label=False, elem_id="prompt") | |
btn = gr.Button("Inpaint!", elem_id="run_button") | |
with gr.Accordion(label="Advanced Settings", open=False): | |
with gr.Row(equal_height=True): | |
guidance_scale = gr.Number(value=1.2, minimum=0.8, maximum=2.5, step=0.1, label="guidance_scale") | |
steps = gr.Number(value=12, minimum=6, maximum=20, step=1, label="steps") | |
# strength = gr.Number(value=1, minimum=0.01, maximum=1.0, step=0.01, label="strength") | |
seed = gr.Number(value=123456, minimum=0, maximum=999999, step=1, label="seed") | |
negative_prompt = gr.Textbox(label="negative_prompt", value=default_negative_prompt, placeholder=default_negative_prompt, info="what you don't want to see in the image") | |
with gr.Column(): | |
image_out = gr.Image(label="Output", elem_id="output-img", height=400) | |
btn.click(fn=predict, inputs=[image, prompt, negative_prompt, guidance_scale, steps, seed], outputs=[image_out], api_name='run') | |
prompt.submit(fn=predict, inputs=[image, prompt, negative_prompt, guidance_scale, steps, seed], outputs=[image_out]) | |
gr.HTML( | |
""" | |
<div class="footer"> | |
<p>Model by <a href="https://huggingface.co/diffusers" style="text-decoration: underline;" target="_blank">Diffusers</a> - Gradio Demo by 🤗 Hugging Face | |
</p> | |
</div> | |
""" | |
) | |
image_blocks.queue(max_size=25,api_open=False).launch(show_api=False) |