metadata
library_name: diffusers
Model Card for Model ID
SAVETENSORS VERSION >>> https://civitai.com/models/130664
Model Details
Model Description
This is the model card of a 🧨 diffusers model that has been pushed on the Hub. This model card has been automatically generated.
- Developed by: Abdallah Alswaiti
- Model type: SDXL
Uses
Using with Diffusers for low GPU
🧨 Diffusers
Make sure to upgrade diffusers to >= 0.28.0:
pip install -U diffusers --upgrade
In addition make sure to install
transformers
,safetensors
,sentencepiece
,gc
andaccelerate
:pip install transformers accelerate safetensors sentencepiece gc
import torch
from diffusers import StableDiffusionXLPipeline
import gc
from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextModelWithProjection
# from onediffx import compile_pipe, save_pipe, load_pipe
from PIL import Image
def encode_prompt(prompts, tokenizers, text_encoders):
embeddings_list = []
for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
cond_input = tokenizer(
prompt,
max_length=tokenizer.model_max_length,
padding='max_length',
truncation=True,
return_tensors='pt',
)
prompt_embeds = text_encoder(cond_input.input_ids.to('cuda'), output_hidden_states=True)
pooled_prompt_embeds = prompt_embeds[0]
embeddings_list.append(prompt_embeds.hidden_states[-2])
prompt_embeds = torch.concat(embeddings_list, dim=-1)
negative_prompt_embeds = torch.zeros_like(prompt_embeds)
negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
bs_embed, seq_len, _ = prompt_embeds.shape
prompt_embeds = prompt_embeds.repeat(1, 1, 1).view(bs_embed * 1, seq_len, -1)
seq_len = negative_prompt_embeds.shape[1]
negative_prompt_embeds = negative_prompt_embeds.repeat(1, 1, 1).view(1 * 1, seq_len, -1)
pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, 1).view(bs_embed * 1, -1)
negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, 1).view(bs_embed * 1, -1)
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
def get_user_input():
prompt = input("Enter prompt: ") or '3/4 shot, candid photograph of a beautiful 30 year old redhead woman with messy dark hair, peacefully sleeping in her bed, night, dark, light from window, dark shadows, masterpiece, uhd, moody'
negative_prompt = input("Enter negative prompt: ") or ""
cfg_scale = float(input("Enter CFG scale (default 7.5): ") or 7.5)
steps = int(input("Enter number of steps (default 50): ") or 50)
width = int(input("Enter image width (default 512): ") or 512)
height = int(input("Enter image height (default 512): ") or 512)
seed = int(input("Enter seed (default 42): ") or 42)
return prompt, negative_prompt, cfg_scale, steps, width, height, seed
pipe = StableDiffusionXLPipeline.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', use_safetensors=True, torch_dtype=torch.float16, tokenizer=None, text_encoder=None, tokenizer_2=None, text_encoder_2=None).to('cuda')
# pipe = compile_pipe(pipe)
# load_pipe(pipe, dir="cached_pipe")
while True:
queue = []
prompt, negative_prompt, cfg_scale, steps, width, height, seed = get_user_input()
queue.append({
'prompt': prompt,
'negative_prompt': negative_prompt,
'cfg_scale': cfg_scale,
'steps': steps,
'width': width,
'height': height,
'seed': seed,
})
tokenizer = CLIPTokenizer.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='tokenizer')
text_encoder = CLIPTextModel.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='text_encoder', use_safetensors=True, torch_dtype=torch.float16).to('cuda')
tokenizer_2 = CLIPTokenizer.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='tokenizer_2')
text_encoder_2 = CLIPTextModelWithProjection.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='text_encoder_2', use_safetensors=True, torch_dtype=torch.float16).to('cuda')
with torch.no_grad():
for generation in queue:
generation['embeddings'] = encode_prompt(
[generation['prompt'], generation['prompt']],
[tokenizer, tokenizer_2],
[text_encoder, text_encoder_2],
)
del tokenizer, text_encoder, tokenizer_2, text_encoder_2
gc.collect()
torch.cuda.empty_cache()
generator = torch.Generator(device='cuda')
for i, generation in enumerate(queue, start=1):
generator.manual_seed(generation['seed'])
generation['latents'] = pipe(
prompt_embeds=generation['embeddings'][0],
negative_prompt_embeds=generation['embeddings'][1],
pooled_prompt_embeds=generation['embeddings'][2],
negative_pooled_prompt_embeds=generation['embeddings'][3],
generator=generator,
output_type='latent',
guidance_scale=generation['cfg_scale'],
num_inference_steps=generation['steps'],
height=generation['height'],
width=generation['width']
).images
del pipe.unet
gc.collect()
torch.cuda.empty_cache()
# load_pipe(pipe, dir="cached_pipe")
pipe.upcast_vae()
with torch.no_grad():
for i, generation in enumerate(queue, start=1):
generation['latents'] = generation['latents'].to(next(iter(pipe.vae.post_quant_conv.parameters())).dtype)
image = pipe.vae.decode(generation['latents'] / pipe.vae.config.scaling_factor, return_dict=False)[0]
image = pipe.image_processor.postprocess(image, output_type='pil')[0]
image_path = f'image_{i}.png'
image.save(image_path)
print(f"Image saved at: {image_path}")
# save_pipe(pipe, dir="cached_pipe")
if input("Do you want to create another image? (y/n): ").lower() != 'y':
break