Spaces:

Gosula
/

Stable_diffusion_model

Sleeping

App Files Files Community

Stable_diffusion_model / device.py

Gosula

Update device.py

75d975a 9 months ago

raw

history blame

No virus

1.7 kB

	from base64 import b64encode
	import numpy
	import torch
	from diffusers import AutoencoderKL, LMSDiscreteScheduler, UNet2DConditionModel
	from huggingface_hub import notebook_login

	# For video display:
	from IPython.display import HTML
	from matplotlib import pyplot as plt
	from pathlib import Path
	from PIL import Image
	from torch import autocast
	from torchvision import transforms as tfms
	from tqdm.auto import tqdm
	from transformers import CLIPTextModel, CLIPTokenizer, logging
	import os
	torch_device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
	#torch_device = "cpu"


	# Load the autoencoder model which will be used to decode the latents into image space.
	vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae")

	# Load the tokenizer and text encoder to tokenize and encode the text.
	tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
	text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")

	# The UNet model for generating the latents.
	unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet")

	# The noise scheduler
	scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)

	# To the GPU we go!
	vae = vae.to(torch_device)
	text_encoder = text_encoder.to(torch_device)
	unet = unet.to(torch_device);


	token_emb_layer = text_encoder.text_model.embeddings.token_embedding
	pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
	position_ids = text_encoder.text_model.embeddings.position_ids[:, :77]
	position_embeddings = pos_emb_layer(position_ids)