StableDiffusion 3.5
Collection
2 items
โข
Updated
Stable Diffusion 3.5 is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
For more technical details, please refer to the Research paper.
Please note: this model is released under the Stability Community License. For Enterprise License visit Stability.ai or contact us for commercial licensing details.
The following model checkpoints are provided by the Keras team. Full code examples for each are available below.
Preset name | Parameters | Description |
---|---|---|
stable_diffusion_3.5_large | 9.05B | 9 billion parameter, including CLIP L and CLIP G text encoders, MMDiT generative model, and VAE autoencoder. Developed by Stability AI. |
stable_diffusion_3.5_large_turbo | 9.05B | 9 billion parameter, including CLIP L and CLIP G text encoders, MMDiT generative model, and VAE autoencoder. A timestep-distilled version that eliminates classifier-free guidance and uses fewer steps for generation. Developed by Stability AI. |
!pip install -U keras-hub
!pip install -U keras
# Pretrained Stable Diffusion 3 model.
model = keras_hub.models.StableDiffusion3Backbone.from_preset(
"stable_diffusion_3.5_large"
)
# Randomly initialized Stable Diffusion 3 model with custom config.
vae = keras_hub.models.VAEBackbone(...)
clip_l = keras_hub.models.CLIPTextEncoder(...)
clip_g = keras_hub.models.CLIPTextEncoder(...)
model = keras_hub.models.StableDiffusion3Backbone(
mmdit_patch_size=2,
mmdit_num_heads=4,
mmdit_hidden_dim=256,
mmdit_depth=4,
mmdit_position_size=192,
vae=vae,
clip_l=clip_l,
clip_g=clip_g,
)
# Image to image example
image_to_image = keras_hub.models.StableDiffusion3ImageToImage.from_preset(
"stable_diffusion_3.5_large", height=512, width=512
)
image_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
}
)
# Generate with batched prompts.
image_to_image.generate(
{
"images": np.ones((2, 512, 512, 3), dtype="float32"),
"prompts": ["cute wallpaper art of a cat", "cute wallpaper art of a dog"],
}
)
# Generate with different `num_steps`, `guidance_scale` and `strength`.
image_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
}
num_steps=50,
guidance_scale=5.0,
strength=0.6,
)
# Generate with `negative_prompts`.
text_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"negative_prompts": "green color",
}
)
# inpainting example
reference_image = np.ones((1024, 1024, 3), dtype="float32")
reference_mask = np.ones((1024, 1024), dtype="float32")
inpaint = keras_hub.models.StableDiffusion3Inpaint.from_preset(
"stable_diffusion_3.5_large", height=512, width=512
)
inpaint.generate(
reference_image,
reference_mask,
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
)
# Generate with batched prompts.
reference_images = np.ones((2, 512, 512, 3), dtype="float32")
reference_mask = np.ones((2, 1024, 1024), dtype="float32")
inpaint.generate(
reference_images,
reference_mask,
["cute wallpaper art of a cat", "cute wallpaper art of a dog"]
)
# Generate with different `num_steps`, `guidance_scale` and `strength`.
inpaint.generate(
reference_image,
reference_mask,
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
num_steps=50,
guidance_scale=5.0,
strength=0.6,
)
# text to image example
text_to_image = keras_hub.models.StableDiffusion3TextToImage.from_preset(
"stable_diffusion_3.5_large", height=512, width=512
)
text_to_image.generate(
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
)
# Generate with batched prompts.
text_to_image.generate(
["cute wallpaper art of a cat", "cute wallpaper art of a dog"]
)
# Generate with different `num_steps` and `guidance_scale`.
text_to_image.generate(
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
num_steps=50,
guidance_scale=5.0,
)
# Generate with `negative_prompts`.
text_to_image.generate(
{
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"negative_prompts": "green color",
}
)
!pip install -U keras-hub
!pip install -U keras
# Pretrained Stable Diffusion 3 model.
model = keras_hub.models.StableDiffusion3Backbone.from_preset(
"hf://keras/stable_diffusion_3.5_large"
)
# Randomly initialized Stable Diffusion 3 model with custom config.
vae = keras_hub.models.VAEBackbone(...)
clip_l = keras_hub.models.CLIPTextEncoder(...)
clip_g = keras_hub.models.CLIPTextEncoder(...)
model = keras_hub.models.StableDiffusion3Backbone(
mmdit_patch_size=2,
mmdit_num_heads=4,
mmdit_hidden_dim=256,
mmdit_depth=4,
mmdit_position_size=192,
vae=vae,
clip_l=clip_l,
clip_g=clip_g,
)
# Image to image example
image_to_image = keras_hub.models.StableDiffusion3ImageToImage.from_preset(
"hf://keras/stable_diffusion_3.5_large", height=512, width=512
)
image_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
}
)
# Generate with batched prompts.
image_to_image.generate(
{
"images": np.ones((2, 512, 512, 3), dtype="float32"),
"prompts": ["cute wallpaper art of a cat", "cute wallpaper art of a dog"],
}
)
# Generate with different `num_steps`, `guidance_scale` and `strength`.
image_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
}
num_steps=50,
guidance_scale=5.0,
strength=0.6,
)
# Generate with `negative_prompts`.
text_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"negative_prompts": "green color",
}
)
# inpainting example
reference_image = np.ones((1024, 1024, 3), dtype="float32")
reference_mask = np.ones((1024, 1024), dtype="float32")
inpaint = keras_hub.models.StableDiffusion3Inpaint.from_preset(
"hf://keras/stable_diffusion_3.5_large", height=512, width=512
)
inpaint.generate(
reference_image,
reference_mask,
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
)
# Generate with batched prompts.
reference_images = np.ones((2, 512, 512, 3), dtype="float32")
reference_mask = np.ones((2, 1024, 1024), dtype="float32")
inpaint.generate(
reference_images,
reference_mask,
["cute wallpaper art of a cat", "cute wallpaper art of a dog"]
)
# Generate with different `num_steps`, `guidance_scale` and `strength`.
inpaint.generate(
reference_image,
reference_mask,
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
num_steps=50,
guidance_scale=5.0,
strength=0.6,
)
# text to image example
text_to_image = keras_hub.models.StableDiffusion3TextToImage.from_preset(
"hf://keras/stable_diffusion_3.5_large", height=512, width=512
)
text_to_image.generate(
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
)
# Generate with batched prompts.
text_to_image.generate(
["cute wallpaper art of a cat", "cute wallpaper art of a dog"]
)
# Generate with different `num_steps` and `guidance_scale`.
text_to_image.generate(
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
num_steps=50,
guidance_scale=5.0,
)
# Generate with `negative_prompts`.
text_to_image.generate(
{
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"negative_prompts": "green color",
}
)