|
from diffusers import DiffusionPipeline |
|
from diffusers.utils import pt_to_pil |
|
import gradio as gr |
|
import torch |
|
import numpy as np |
|
|
|
|
|
stage_1 = DiffusionPipeline.from_pretrained( |
|
"DeepFloyd/IF-I-M-v1.0", variant="fp16", torch_dtype=torch.float16 |
|
) |
|
stage_1.enable_xformers_memory_efficient_attention() |
|
stage_1.enable_model_cpu_offload() |
|
stage_2 = DiffusionPipeline.from_pretrained( |
|
"DeepFloyd/IF-II-M-v1.0", |
|
text_encoder=None, |
|
variant="fp16", |
|
torch_dtype=torch.float16, |
|
) |
|
stage_2.enable_xformers_memory_efficient_attention() |
|
stage_2.enable_model_cpu_offload() |
|
|
|
|
|
safety_modules = { |
|
"feature_extractor": stage_1.feature_extractor, |
|
"safety_checker": stage_1.safety_checker, |
|
"watermarker": stage_1.watermarker, |
|
} |
|
stage_3 = DiffusionPipeline.from_pretrained( |
|
"stabilityai/stable-diffusion-x4-upscaler", |
|
**safety_modules, |
|
torch_dtype=torch.float16 |
|
) |
|
stage_3.enable_xformers_memory_efficient_attention() |
|
stage_3.enable_model_cpu_offload() |
|
|
|
|
|
def predict(prompt): |
|
prompt_embeds, negative_embeds = stage_1.encode_prompt(prompt) |
|
generator = torch.manual_seed(0) |
|
image = stage_1( |
|
prompt_embeds=prompt_embeds, |
|
negative_prompt_embeds=negative_embeds, |
|
generator=generator, |
|
output_type="pt", |
|
).images |
|
image = stage_2( |
|
image=image, |
|
prompt_embeds=prompt_embeds, |
|
negative_prompt_embeds=negative_embeds, |
|
generator=generator, |
|
output_type="pt", |
|
).images |
|
image = stage_3( |
|
prompt=prompt, image=image, generator=generator, noise_level=100 |
|
).images[0] |
|
return image |
|
|
|
|
|
gradio_app = gr.Interface( |
|
fn=predict, |
|
inputs="text", |
|
outputs="image", |
|
title="Text to Image Generator", |
|
description="Enter a text string to generate an image.", |
|
) |
|
|
|
if __name__ == "__main__": |
|
gradio_app.launch(server_name="0.0.0.0") |
|
|