audio_img / text_to_img.py
pengdaqian
add more
171f55b
import torch
from diffusers import UnCLIPScheduler, DDPMScheduler, StableUnCLIPPipeline
from diffusers.models import PriorTransformer
from transformers import CLIPTokenizer, CLIPTextModelWithProjection
def init_text2img_pipe():
device = "cuda" if torch.cuda.is_available() else "cpu"
data_type = torch.float16 if torch.cuda.is_available() else torch.float32
prior_model_id = "kakaobrain/karlo-v1-alpha"
prior = PriorTransformer.from_pretrained(prior_model_id, subfolder="prior", torch_dtype=data_type)
prior_text_model_id = "openai/clip-vit-large-patch14"
prior_tokenizer = CLIPTokenizer.from_pretrained(prior_text_model_id)
prior_text_model = CLIPTextModelWithProjection.from_pretrained(prior_text_model_id, torch_dtype=data_type)
prior_scheduler = UnCLIPScheduler.from_pretrained(prior_model_id, subfolder="prior_scheduler")
prior_scheduler = DDPMScheduler.from_config(prior_scheduler.config)
stable_unclip_model_id = "stabilityai/stable-diffusion-2-1-unclip-small"
pipe = StableUnCLIPPipeline.from_pretrained(
stable_unclip_model_id,
torch_dtype=data_type,
variant="fp16",
prior_tokenizer=prior_tokenizer,
prior_text_encoder=prior_text_model,
prior=prior,
prior_scheduler=prior_scheduler,
)
return pipe.to(device)
def predict(prompt: str, negative_prompt: str, pipeline):
return pipeline(prompt=prompt,
negative_prompt=negative_prompt,
height=600,
width=400,
num_inference_steps=60).images
if __name__ == "__main__":
text2img_pipeline = init_text2img_pipe()
images = predict("a dog", "a cat", text2img_pipeline)
for idx, image in enumerate(images):
image.save(f"/root/autodl-tmp/image_{idx}.png")