multimodalart's picture
Update app.py
cafc712 verified
raw
history blame
3.93 kB
import gradio as gr
import torch
import spaces
from diffusers import FluxInpaintPipeline
from PIL import Image, ImageFile
import numpy as np
#ImageFile.LOAD_TRUNCATED_IMAGES = True
# Initialize the pipeline
pipe = FluxInpaintPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
torch_dtype=torch.bfloat16
)
pipe.to("cuda")
pipe.load_lora_weights(
"ali-vilab/In-Context-LoRA",
weight_name="visual-identity-design.safetensors"
)
def square_center_crop(img, target_size=768):
if img.mode in ('RGBA', 'P'):
img = img.convert('RGB')
width, height = img.size
crop_size = min(width, height)
left = (width - crop_size) // 2
top = (height - crop_size) // 2
right = left + crop_size
bottom = top + crop_size
img_cropped = img.crop((left, top, right, bottom))
return img_cropped.resize((target_size, target_size), Image.Resampling.LANCZOS)
def duplicate_horizontally(img):
# Convert PIL Image to numpy array
width, height = img.size
if width != height:
raise ValueError(f"Input image must be square, got {width}x{height}")
img_array = np.array(img)
duplicated = np.concatenate([img_array, img_array], axis=1)
return Image.fromarray(duplicated)
# Load the mask image
mask = Image.open("mask_square.png")
def crop_input(image):
cropped_image = square_center_crop(image)
return cropped_image
@spaces.GPU
def generate(image, prompt_user, progress=gr.Progress(track_tqdm=True)):
prompt_structure = "The two-panel image showcases the logo of a brand, [LEFT] the left panel is showing the logo [RIGHT] the right panel has this logo applied to "
prompt = prompt_structure + prompt_user
print(image)
image = duplicate_horizontally(image)
out = pipe(
prompt=prompt,
image=image,
mask_image=mask,
guidance_scale=3.75,
height=768,
width=1536,
num_inference_steps=28,
max_sequence_length=256,
strength=1
).images[0]
width, height = out.size
half_width = width // 2
image_2 = out.crop((half_width, 0, width, height))
return image_2, out
with gr.Blocks() as demo:
gr.Markdown("# Logo in Context")
gr.Markdown("### In-Context LoRA + Image-to-Image, apply your logo to anything")
with gr.Row():
with gr.Column():
input_image = gr.Image(
label="Upload Logo Image",
type="pil"
)
cropped_image = gr.Image(
visible=False,
type="pil"
)
prompt_input = gr.Textbox(
label="Where should the logo be applied?",
placeholder="e.g., a coffee cup on a wooden table"
)
generate_btn = gr.Button("Generate Application", variant="primary")
with gr.Column():
output_image = gr.Image(label="Generated Application")
output_side = gr.Image(label="Side by side")
gr.Examples(
examples=[
["huggingface.png", "A hat"],
["awesome.png", "A tattoo on a leg"],
["dvd_logo.png", "a flower pot"]
],
inputs=[input_image, prompt_input],
outputs=[output_image, output_side],
fn=generate,
cache_examples="lazy"
)
with gr.Row():
gr.Markdown("""
### Instructions:
1. Upload a logo image (preferably square)
2. Describe where you'd like to see the logo applied
3. Click 'Generate Application' and wait for the result
Note: The generation process might take a few moments.
""")
# Set up the click event
generate_btn.click(
fn=crop_input,
inputs=[input_image],
outputs=[cropped_image]
).then(
fn=generate,
inputs=[cropped_image, prompt_input],
outputs=[output_image, output_side]
)
demo.launch()