Disty0 commited on
Commit
cd47cca
1 Parent(s): 721f70c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +86 -0
README.md CHANGED
@@ -33,6 +33,92 @@ An anime diffusion model finetuned on Würstchen V3.
33
  <img class="image" src="https://cdn-uploads.huggingface.co/production/uploads/6456af6195082f722d178522/uua4L9aaqJ0LI8gYv4xmC.png" width="320">
34
  </table>
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ## Training:
37
 
38
  **GPU used**: 7x Nvidia H100 80GB SXM5
 
33
  <img class="image" src="https://cdn-uploads.huggingface.co/production/uploads/6456af6195082f722d178522/uua4L9aaqJ0LI8gYv4xmC.png" width="320">
34
  </table>
35
 
36
+
37
+ # Code Example
38
+
39
+ ```shell
40
+ pip install diffusers
41
+ ```
42
+
43
+ ```python
44
+ import torch
45
+ import diffusers
46
+
47
+ device = "cuda"
48
+ dtype = torch.float16
49
+
50
+ model_path = "Disty0/sotediffusion-v2"
51
+
52
+ def get_timestep_ratio_conditioning(t, alphas_cumprod):
53
+ s = torch.tensor([0.008]) # diffusers uses 0.003 while the original is 0.008
54
+ clamp_range = [0, 1]
55
+ min_var = torch.cos(s / (1 + s) * torch.pi * 0.5) ** 2
56
+ var = alphas_cumprod[t]
57
+ var = var.clamp(*clamp_range)
58
+ s, min_var = s.to(var.device), min_var.to(var.device)
59
+ ratio = (((var * min_var) ** 0.5).acos() / (torch.pi * 0.5)) * (1 + s) - s
60
+ return ratio
61
+
62
+ pipe = diffusers.AutoPipelineForText2Image.from_pretrained(model_path, text_encoder=None, torch_dtype=dtype)
63
+
64
+ # diffusers bugs
65
+ pipe.prior_pipe.get_timestep_ratio_conditioning = get_timestep_ratio_conditioning
66
+ pipe.prior_pipe.scheduler.config.clip_sample = False
67
+
68
+ # de-dupe
69
+ pipe.decoder_pipe.text_encoder = pipe.text_encoder = None # nothing uses this
70
+ del pipe.decoder_pipe.text_encoder
71
+ del pipe.prior_prior
72
+ del pipe.prior_text_encoder
73
+ del pipe.prior_tokenizer
74
+ del pipe.prior_scheduler
75
+ del pipe.prior_feature_extractor
76
+ del pipe.prior_image_encoder
77
+
78
+ pipe = pipe.to(device, dtype=dtype)
79
+ pipe.prior_pipe = pipe.prior_pipe.to(device, dtype=dtype)
80
+
81
+
82
+ prompt = "1girl, solo, looking at viewer, open mouth, blue eyes, medium breasts, blonde hair, gloves, dress, bow, hair between eyes, bare shoulders, upper body, hair bow, indoors, elbow gloves, hand on own chest, bridal gauntlets, candlestand, smile, rim lighting, from side, castle interior, looking side,"
83
+ quality_prompt = "very aesthetic, best quality, newest"
84
+ negative_prompt = "very displeasing, displeasing, worst quality, bad quality, low quality, realistic, monochrome, comic, sketch, oldest, early, artist name, signature, blurry, simple background, upside down,"
85
+
86
+ num_images_per_prompt=1
87
+
88
+ # Encode prompts and quality prompts eperately:
89
+ # device, batch_size, num_images_per_prompt, cfg, prompt
90
+ prompt_embeds, prompt_embeds_pooled, _, _ = pipe.prior_pipe.encode_prompt(device, 1, num_images_per_prompt, False, prompt=prompt)
91
+ quality_prompt_embeds, _, _, _ = pipe.prior_pipe.encode_prompt(device, 1, num_images_per_prompt, False, prompt=quality_prompt)
92
+
93
+ negative_prompt_embeds, negative_prompt_embeds_pooled, _, _ = pipe.prior_pipe.encode_prompt(device, 1, num_images_per_prompt, False, prompt=negative_prompt)
94
+ empty_prompt_embeds, _, _, _ = pipe.prior_pipe.encode_prompt(device, 1, num_images_per_prompt, False, prompt="")
95
+ empty_prompt_embeds = torch.nn.functional.normalize(empty_prompt_embeds)
96
+
97
+ prompt_embeds = torch.cat([prompt_embeds, quality_prompt_embeds], dim=1)
98
+ negative_prompt_embeds = torch.cat([negative_prompt_embeds, empty_prompt_embeds], dim=1)
99
+
100
+ pipe.prior_pipe.maybe_free_model_hooks()
101
+
102
+ output = pipe(
103
+ width=1024,
104
+ height=1536,
105
+ decoder_guidance_scale=1.0,
106
+ prior_guidance_scale=7.0,
107
+ prior_num_inference_steps=30,
108
+ num_inference_steps=10,
109
+ output_type="pil",
110
+ prompt=prompt + " " + quality_prompt,
111
+ negative_prompt=negative_prompt,
112
+ prompt_embeds=prompt_embeds,
113
+ prompt_embeds_pooled=prompt_embeds_pooled,
114
+ negative_prompt_embeds=negative_prompt_embeds,
115
+ negative_prompt_embeds_pooled=negative_prompt_embeds_pooled,
116
+ num_images_per_prompt=num_images_per_prompt,
117
+ ).images[0]
118
+
119
+ display(output)
120
+ ```
121
+
122
  ## Training:
123
 
124
  **GPU used**: 7x Nvidia H100 80GB SXM5