|
|
from PIL import Image |
|
|
import torch |
|
|
import numpy as np |
|
|
|
|
|
from transformers import Qwen2_5_VLForConditionalGeneration |
|
|
|
|
|
from diffusers import ( |
|
|
QwenImagePipeline, |
|
|
QwenImageTransformer2DModel, |
|
|
QwenImageInpaintPipeline, |
|
|
) |
|
|
|
|
|
from optimum.quanto import quantize, qint8, freeze |
|
|
|
|
|
|
|
|
prompt = ( |
|
|
"equirectangular, a woman and a man sitting at a cafe, the woman has red hair " |
|
|
"and she's wearing purple sweater with a black scarf and a white hat, the man " |
|
|
"is sitting on the other side of the table and he's wearing a white shirt with " |
|
|
"a purple scarf and red hat, both of them are sipping their coffee while in the " |
|
|
"table there's some cake slices on their respective plates, each with forks and " |
|
|
"knives at each side." |
|
|
) |
|
|
negative_prompt = "" |
|
|
output_filename = "qwen_int8.png" |
|
|
width, height = 2048, 1024 |
|
|
true_cfg_scale = 4.0 |
|
|
num_inference_steps = 25 |
|
|
seed = 42 |
|
|
|
|
|
lora_model_id = "ProGamerGov/qwen-360-diffusion" |
|
|
lora_filename = "qwen-360-diffusion-int8-bf16-v1.safetensors" |
|
|
|
|
|
|
|
|
model_id = "Qwen/Qwen-Image" |
|
|
torch_dtype = torch.bfloat16 |
|
|
device = "cuda" |
|
|
|
|
|
fix_seam = True |
|
|
inpaint_strength, seam_width = 0.5, 0.10 |
|
|
|
|
|
|
|
|
def shift_equirect(img): |
|
|
"""Horizontal 50% shift using torch.roll.""" |
|
|
t = torch.from_numpy(np.array(img)).permute(2, 0, 1).float() / 255.0 |
|
|
t = torch.roll(t, shifts=(0, t.shape[2] // 2), dims=(1, 2)) |
|
|
return Image.fromarray((t.permute(1, 2, 0).numpy() * 255).astype(np.uint8)) |
|
|
|
|
|
|
|
|
def create_seam_mask(w, h, frac=0.10): |
|
|
"""Create vertical seam mask as PIL Image (center seam).""" |
|
|
mask = torch.zeros((h, w)) |
|
|
seam_w = max(1, int(w * frac)) |
|
|
c = w // 2 |
|
|
mask[:, c - seam_w // 2:c + seam_w // 2] = 1.0 |
|
|
return Image.fromarray((mask.numpy() * 255).astype("uint8"), "L") |
|
|
|
|
|
|
|
|
def load_pipeline(text_encoder, transformer, mode="t2i"): |
|
|
pip_class = QwenImagePipeline if mode == "t2i" else QwenImageInpaintPipeline |
|
|
pipe = pip_class.from_pretrained( |
|
|
model_id, |
|
|
transformer=transformer, |
|
|
text_encoder=text_encoder, |
|
|
torch_dtype=torch_dtype, |
|
|
use_safetensors=True, |
|
|
low_cpu_mem_usage=True, |
|
|
) |
|
|
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) |
|
|
pipe.enable_model_cpu_offload() |
|
|
pipe.enable_vae_tiling() |
|
|
|
|
|
|
|
|
return pipe |
|
|
|
|
|
|
|
|
def main(): |
|
|
|
|
|
transformer = QwenImageTransformer2DModel.from_pretrained( |
|
|
model_id, |
|
|
subfolder="transformer", |
|
|
torch_dtype=torch_dtype, |
|
|
low_cpu_mem_usage=True, |
|
|
) |
|
|
quantize(transformer, weights=qint8) |
|
|
freeze(transformer) |
|
|
|
|
|
|
|
|
text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained( |
|
|
model_id, |
|
|
subfolder="text_encoder", |
|
|
torch_dtype=torch_dtype, |
|
|
low_cpu_mem_usage=True, |
|
|
device_map={"": "cpu"}, |
|
|
) |
|
|
quantize(text_encoder, weights=qint8) |
|
|
freeze(text_encoder) |
|
|
|
|
|
|
|
|
generator = torch.Generator(device=device).manual_seed(seed) |
|
|
pipe = load_pipeline(text_encoder, transformer, mode="t2i") |
|
|
|
|
|
|
|
|
image = pipe( |
|
|
prompt=prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
width=width, |
|
|
height=height, |
|
|
num_inference_steps=num_inference_steps, |
|
|
true_cfg_scale=true_cfg_scale, |
|
|
generator=generator, |
|
|
).images[0] |
|
|
|
|
|
image.save(output_filename) |
|
|
|
|
|
|
|
|
if fix_seam: |
|
|
del pipe |
|
|
if torch.cuda.is_available(): |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
shifted = shift_equirect(image) |
|
|
mask = create_seam_mask(width, height, frac=seam_width) |
|
|
|
|
|
pipe = load_pipeline(text_encoder, transformer, mode="i2i") |
|
|
image_fixed = pipe( |
|
|
prompt=prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
image=shifted, |
|
|
mask_image=mask, |
|
|
strength=inpaint_strength, |
|
|
width=width, |
|
|
height=height, |
|
|
num_inference_steps=num_inference_steps, |
|
|
true_cfg_scale=true_cfg_scale, |
|
|
generator=generator, |
|
|
).images[0] |
|
|
image_fixed = shift_equirect(image_fixed) |
|
|
image_fixed.save(output_filename.replace(".png", "_seamfix.png")) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|