img2img_test

Runtime error

App Files Files Community

Gemini899 commited on Feb 25

Commit

b1bb2b0

verified ·

1 Parent(s): 4499056

Update flux1_img2img.py

Browse files

Files changed (1) hide show

flux1_img2img.py +30 -18

flux1_img2img.py CHANGED Viewed

@@ -4,6 +4,15 @@ from PIL import Image
 import sys
 import spaces
 @spaces.GPU
 def process_image(
     image,
@@ -19,27 +28,32 @@ def process_image(
         print("empty input image returned")
         return None
-    # 1) Use float16 (T4 doesn't have native bf16 support)
-    # 2) low_cpu_mem_usage=True for more efficient loading
-    # 3) Optionally enable xFormers
     pipe = FluxImg2ImgPipeline.from_pretrained(
         model_id,
-        torch_dtype=torch.float16,
-        revision="fp16",            # sometimes needed if the repo has an FP16 branch
-        low_cpu_mem_usage=True
-    )
-    # Move to GPU
-    pipe.to("cuda")
-    # If you have xFormers installed (pip install xformers):
     try:
         pipe.enable_xformers_memory_efficient_attention()
         print("Enabled xFormers memory efficient attention.")
     except Exception as e:
-        print("xFormers not available:", e)
-    # Create a reproducible generator
     generator = torch.Generator("cuda").manual_seed(seed)
     print(f"Prompt: {prompt}")
@@ -48,18 +62,16 @@ def process_image(
         image=image,
         generator=generator,
         strength=strength,
-        guidance_scale=0,  # same as your original code
         num_inference_steps=num_inference_steps,
         max_sequence_length=256
     )
-    # TODO: support mask if needed
     return output.images[0]
 if __name__ == "__main__":
-    # Usage: python img2img.py input_image.png input_mask.png output.png
     image = Image.open(sys.argv[1]).convert("RGB")
-    mask = Image.open(sys.argv[2]).convert("RGB")  # currently unused
     result = process_image(image, mask)
-    if result is not None:
         result.save(sys.argv[3])

 import sys
 import spaces
+def resize_image(image, max_res=512):
+    w, h = image.size
+    ratio = min(max_res / w, max_res / h)
+    if ratio < 1.0:
+        new_w = int(w * ratio)
+        new_h = int(h * ratio)
+        image = image.resize((new_w, new_h), Image.LANCZOS)
+    return image
 @spaces.GPU
 def process_image(
     image,
         print("empty input image returned")
         return None
+    # Try resizing input to reduce VRAM usage
+    image = resize_image(image, 512)
+    # Load with float16
     pipe = FluxImg2ImgPipeline.from_pretrained(
         model_id,
+        torch_dtype=torch.float16
+    ).to("cuda")
+    # If xFormers installed, enable memory efficient attention
     try:
         pipe.enable_xformers_memory_efficient_attention()
         print("Enabled xFormers memory efficient attention.")
     except Exception as e:
+        print("Could not enable xFormers:", e)
+    # Enable CPU offload to reduce VRAM usage
+    # (Pick either model_cpu_offload or sequential_cpu_offload)
+    try:
+        pipe.enable_model_cpu_offload()
+    except Exception as e:
+        print("Could not enable model_cpu_offload:", e)
+    # Optional: enable VAE slicing
+    pipe.enable_vae_slicing()
     generator = torch.Generator("cuda").manual_seed(seed)
     print(f"Prompt: {prompt}")
         image=image,
         generator=generator,
         strength=strength,
+        guidance_scale=0,
         num_inference_steps=num_inference_steps,
         max_sequence_length=256
     )
     return output.images[0]
 if __name__ == "__main__":
     image = Image.open(sys.argv[1]).convert("RGB")
+    mask = Image.open(sys.argv[2]).convert("RGB")  # unused
     result = process_image(image, mask)
+    if result:
         result.save(sys.argv[3])