flux-lightning

Runtime error

App Files Files Community

Jordan Legg commited on Aug 9

Commit

8a31b39

•

1 Parent(s): 4e6d911

optimised

Browse files

Files changed (1) hide show

app.py +34 -10

app.py CHANGED Viewed

@@ -5,10 +5,25 @@ import spaces
 import torch
 from diffusers import DiffusionPipeline
-dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
@@ -17,15 +32,24 @@ MAX_IMAGE_SIZE = 2048
 def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-            prompt = prompt,
-            width = width,
-            height = height,
-            num_inference_steps = num_inference_steps,
-            generator = generator,
             guidance_scale=0.0
-    ).images[0]
     return image, seed
 examples = [

 import torch
 from diffusers import DiffusionPipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load the model in FP16
+pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.float16)
+# Move the pipeline to GPU
+pipe = pipe.to(device)
+# Convert text encoders to full precision
+pipe.text_encoder = pipe.text_encoder.to(torch.float32)
+if hasattr(pipe, 'text_encoder_2'):
+    pipe.text_encoder_2 = pipe.text_encoder_2.to(torch.float32)
+# Enable memory efficient attention if available
+if hasattr(pipe, 'enable_xformers_memory_efficient_attention'):
+    pipe.enable_xformers_memory_efficient_attention()
+# Compile the UNet for potential speedups
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
 def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device=device).manual_seed(seed)
+    # Use full precision for text encoding
+    with torch.no_grad():
+        text_inputs = pipe.tokenizer(prompt, return_tensors="pt").to(device)
+        text_embeddings = pipe.text_encoder(text_inputs.input_ids)[0]
+    # Use mixed precision for the rest of the pipeline
+    with torch.inference_mode(), torch.autocast(device_type='cuda', dtype=torch.float16):
+        image = pipe(
+            prompt_embeds=text_embeddings,
+            width=width,
+            height=height,
+            num_inference_steps=num_inference_steps,
+            generator=generator,
             guidance_scale=0.0
+        ).images[0]
     return image, seed
 examples = [