Spaces:

next-social
/

audio_img

Build error

audio_img / text_to_img.py

pengdaqian

add more

171f55b over 1 year ago

1.82 kB

	import torch
	from diffusers import UnCLIPScheduler, DDPMScheduler, StableUnCLIPPipeline
	from diffusers.models import PriorTransformer
	from transformers import CLIPTokenizer, CLIPTextModelWithProjection


	def init_text2img_pipe():
	device = "cuda" if torch.cuda.is_available() else "cpu"
	data_type = torch.float16 if torch.cuda.is_available() else torch.float32

	prior_model_id = "kakaobrain/karlo-v1-alpha"
	prior = PriorTransformer.from_pretrained(prior_model_id, subfolder="prior", torch_dtype=data_type)

	prior_text_model_id = "openai/clip-vit-large-patch14"
	prior_tokenizer = CLIPTokenizer.from_pretrained(prior_text_model_id)
	prior_text_model = CLIPTextModelWithProjection.from_pretrained(prior_text_model_id, torch_dtype=data_type)
	prior_scheduler = UnCLIPScheduler.from_pretrained(prior_model_id, subfolder="prior_scheduler")
	prior_scheduler = DDPMScheduler.from_config(prior_scheduler.config)

	stable_unclip_model_id = "stabilityai/stable-diffusion-2-1-unclip-small"

	pipe = StableUnCLIPPipeline.from_pretrained(
	stable_unclip_model_id,
	torch_dtype=data_type,
	variant="fp16",
	prior_tokenizer=prior_tokenizer,
	prior_text_encoder=prior_text_model,
	prior=prior,
	prior_scheduler=prior_scheduler,
	)
	return pipe.to(device)


	def predict(prompt: str, negative_prompt: str, pipeline):
	return pipeline(prompt=prompt,
	negative_prompt=negative_prompt,
	height=600,
	width=400,
	num_inference_steps=60).images


	if __name__ == "__main__":
	text2img_pipeline = init_text2img_pipe()
	images = predict("a dog", "a cat", text2img_pipeline)
	for idx, image in enumerate(images):
	image.save(f"/root/autodl-tmp/image_{idx}.png")