Need Help in Python Script for Running FP8 Model on 8GB VRAM
Hello, Thank you for providing such an amazing model. Im am trying to run fp8 model in 8gb vram and 32gb ram. currently im using this code which gives cuda memory limit error when using model offloading and when using sequential offloading it gives a type error.
Any form of help or guidance would be appreciated
My code:
from optimum.quanto import freeze, qfloat8, quantize
from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
from transformers import CLIPTextModel, CLIPTokenizer,T5EncoderModel, T5TokenizerFast
import torch
dtype = torch.bfloat16
bfl_repo = "./model"
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(bfl_repo, subfolder="scheduler")
text_encoder = CLIPTextModel.from_pretrained("./openai")
tokenizer = CLIPTokenizer.from_pretrained("./openai")
text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
tokenizer_2 = T5TokenizerFast.from_pretrained(bfl_repo, subfolder="tokenizer_2", torch_dtype=dtype)
vae = AutoencoderKL.from_pretrained(bfl_repo, subfolder="vae", torch_dtype=dtype)
transformer = FluxTransformer2DModel.from_pretrained(bfl_repo, subfolder="transformer", torch_dtype=dtype)
quantize(transformer, weights=qfloat8)
freeze(transformer)
quantize(text_encoder_2, weights=qfloat8)
freeze(text_encoder_2)
pipeline = FluxPipeline(
scheduler=scheduler,
text_encoder=text_encoder,
tokenizer=tokenizer,
text_encoder_2=text_encoder_2,
tokenizer_2=tokenizer_2,
vae=vae,
transformer=transformer,
)
pipeline.enable_model_cpu_offload() # give cuda memory limit error
#pipeline.enable_sequential_cpu_offload() # does not work and gives error TypeError: QBytesTensor.new() missing 5 required positional arguments: 'axis', 'size', 'stride', 'data', and 'scale'
prompt = "A cat holding a sign that says hello world"
print('Image Generation Started')
image = pipeline(
prompt,
guidance_scale=3.5,
output_type="pil",
num_inference_steps=20,
generator=torch.Generator("cpu").manual_seed(0)
).images[0]
print('Image Generation Ended')
image.save("flux-fp8-dev.png")