p commited on
Commit
2e95318
1 Parent(s): 0dd9989

diffusers format

Browse files
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: creativeml-openrail-m
3
+ ---
4
+ <b>This model is available on <a href="https://www.mage.space/">Mage.Space</a> (main sponsor)</b><br>
5
+ <b>You can support me directly on Boosty - https://boosty.to/sg_161222</b><br>
6
+
7
+ <b>Please read this!</b><br>
8
+ This is not yet the full version of the model (read the <b>"Model Description"</b> section).<br>
9
+ For version 6.0 it is recommended to use with VAE (to improve generation quality and get rid of artifacts): https://huggingface.co/stabilityai/sd-vae-ft-mse-original<br>
10
+
11
+ <b>Model Description</b><br>
12
+ Realistic Vision V6.0 "New Vision" is a global update for the Realistic Vision model, which will be released gradually in several beta versions until the full release. The model is aimed at realism and photorealism.<br>
13
+ CivitAI Page: https://civitai.com/models/4201/realistic-vision-v60-b1?modelVersionId=245598
14
+
15
+ <b>Resolutions (use lower resolution if you get a lot of mutations and stuff like that)</b><br>
16
+ - Face Portrait: 896x896<br>
17
+ - Portrait: 896x896, 768x1024<br>
18
+ - Half Body: 768x1024, 640x1152<br>
19
+ - Full Body: 896x896, 768x1024, 640x1152, 1024x768, 1152x640<br>
20
+
21
+ <b>Improvements</b>
22
+ - increased generation resolution to such resolutions as: 896x896, 768x1024, 640x1152, 1024x768, 1152x640. (note. in some cases there may still be mutations, duplications, etc -> will be fixed in future versions).<br>
23
+ - improved sfw and nsfw for female and female anatomy (note. not all poses work correctly in such large resolutions -> will be fixed in future versions).<br>
24
+
25
+ <b>Recommended Workflow</b><br>
26
+ Images can be generated with or without Hires.Fix, but it will help improve the generation quality significantly. In some cases it is strictly recommended to use Hires.Fix, namely when generating full body and half body images (note: you can also use Restore Faces or ADetailer).<br>
27
+
28
+ <b>Recommended Generation Parameters</b><br>
29
+ Sampler: DPM++ SDE Karras (25+ steps) / DPM++ 2M SDE (50+ steps)<br>
30
+ Negative Prompt: (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime), text, cropped, out of frame, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck<br>
31
+
32
+ <b>Recommended Hires.Fix Parameters</b><br>
33
+ Sampler: DPM++ SDE Karras or DPM++ 2M SDE<br>
34
+ Denoising steps: 10+ (DPM++ SDE Karras) / 20+ (DPM++ 2M SDE (notice. the lower the value of hires steps at a given sampler, the stronger the skin texture and the higher the chance of getting artifacts))<br>
35
+ Denoising strength: 0.1-0.3<br>
36
+ Upscaler: 4x-UltraSharp / 4x_NMKD-Superscale-SP_178000_G or another<br>
37
+ Upscale by: 1.1-2.0+<br>
feature_extractor/preprocessor_config.json CHANGED
@@ -1,21 +1,4 @@
1
  {
2
- "_valid_processor_keys": [
3
- "images",
4
- "do_resize",
5
- "size",
6
- "resample",
7
- "do_center_crop",
8
- "crop_size",
9
- "do_rescale",
10
- "rescale_factor",
11
- "do_normalize",
12
- "image_mean",
13
- "image_std",
14
- "do_convert_rgb",
15
- "return_tensors",
16
- "data_format",
17
- "input_data_format"
18
- ],
19
  "crop_size": {
20
  "height": 224,
21
  "width": 224
@@ -25,6 +8,7 @@
25
  "do_normalize": true,
26
  "do_rescale": true,
27
  "do_resize": true,
 
28
  "image_mean": [
29
  0.48145466,
30
  0.4578275,
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "crop_size": {
3
  "height": 224,
4
  "width": 224
 
8
  "do_normalize": true,
9
  "do_rescale": true,
10
  "do_resize": true,
11
+ "feature_extractor_type": "CLIPFeatureExtractor",
12
  "image_mean": [
13
  0.48145466,
14
  0.4578275,
model_index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "StableDiffusionPipeline",
3
- "_diffusers_version": "0.28.0.dev0",
4
  "feature_extractor": [
5
  "transformers",
6
  "CLIPFeatureExtractor"
 
1
  {
2
  "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.24.0",
4
  "feature_extractor": [
5
  "transformers",
6
  "CLIPFeatureExtractor"
safety_checker/config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "_name_or_path": "CompVis/stable-diffusion-safety-checker",
3
  "architectures": [
4
  "StableDiffusionSafetyChecker"
@@ -8,21 +9,160 @@
8
  "model_type": "clip",
9
  "projection_dim": 768,
10
  "text_config": {
 
 
 
 
 
 
 
 
 
 
 
 
11
  "dropout": 0.0,
 
 
 
 
 
 
 
 
12
  "hidden_size": 768,
 
 
 
 
 
 
13
  "intermediate_size": 3072,
 
 
 
 
 
 
 
 
 
 
 
14
  "model_type": "clip_text_model",
15
- "num_attention_heads": 12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  },
17
  "torch_dtype": "float32",
18
- "transformers_version": "4.39.0.dev0",
19
  "vision_config": {
 
 
 
 
 
 
 
 
 
 
 
 
20
  "dropout": 0.0,
 
 
 
 
 
 
 
 
21
  "hidden_size": 1024,
 
 
 
 
 
 
 
22
  "intermediate_size": 4096,
 
 
 
 
 
 
 
 
 
 
23
  "model_type": "clip_vision_model",
 
24
  "num_attention_heads": 16,
 
 
 
25
  "num_hidden_layers": 24,
26
- "patch_size": 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
  }
 
1
  {
2
+ "_commit_hash": "cb41f3a270d63d454d385fc2e4f571c487c253c5",
3
  "_name_or_path": "CompVis/stable-diffusion-safety-checker",
4
  "architectures": [
5
  "StableDiffusionSafetyChecker"
 
9
  "model_type": "clip",
10
  "projection_dim": 768,
11
  "text_config": {
12
+ "_name_or_path": "",
13
+ "add_cross_attention": false,
14
+ "architectures": null,
15
+ "attention_dropout": 0.0,
16
+ "bad_words_ids": null,
17
+ "begin_suppress_tokens": null,
18
+ "bos_token_id": 0,
19
+ "chunk_size_feed_forward": 0,
20
+ "cross_attention_hidden_size": null,
21
+ "decoder_start_token_id": null,
22
+ "diversity_penalty": 0.0,
23
+ "do_sample": false,
24
  "dropout": 0.0,
25
+ "early_stopping": false,
26
+ "encoder_no_repeat_ngram_size": 0,
27
+ "eos_token_id": 2,
28
+ "exponential_decay_length_penalty": null,
29
+ "finetuning_task": null,
30
+ "forced_bos_token_id": null,
31
+ "forced_eos_token_id": null,
32
+ "hidden_act": "quick_gelu",
33
  "hidden_size": 768,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1"
37
+ },
38
+ "initializer_factor": 1.0,
39
+ "initializer_range": 0.02,
40
  "intermediate_size": 3072,
41
+ "is_decoder": false,
42
+ "is_encoder_decoder": false,
43
+ "label2id": {
44
+ "LABEL_0": 0,
45
+ "LABEL_1": 1
46
+ },
47
+ "layer_norm_eps": 1e-05,
48
+ "length_penalty": 1.0,
49
+ "max_length": 20,
50
+ "max_position_embeddings": 77,
51
+ "min_length": 0,
52
  "model_type": "clip_text_model",
53
+ "no_repeat_ngram_size": 0,
54
+ "num_attention_heads": 12,
55
+ "num_beam_groups": 1,
56
+ "num_beams": 1,
57
+ "num_hidden_layers": 12,
58
+ "num_return_sequences": 1,
59
+ "output_attentions": false,
60
+ "output_hidden_states": false,
61
+ "output_scores": false,
62
+ "pad_token_id": 1,
63
+ "prefix": null,
64
+ "problem_type": null,
65
+ "projection_dim": 512,
66
+ "pruned_heads": {},
67
+ "remove_invalid_values": false,
68
+ "repetition_penalty": 1.0,
69
+ "return_dict": true,
70
+ "return_dict_in_generate": false,
71
+ "sep_token_id": null,
72
+ "suppress_tokens": null,
73
+ "task_specific_params": null,
74
+ "temperature": 1.0,
75
+ "tf_legacy_loss": false,
76
+ "tie_encoder_decoder": false,
77
+ "tie_word_embeddings": true,
78
+ "tokenizer_class": null,
79
+ "top_k": 50,
80
+ "top_p": 1.0,
81
+ "torch_dtype": null,
82
+ "torchscript": false,
83
+ "transformers_version": "4.30.2",
84
+ "typical_p": 1.0,
85
+ "use_bfloat16": false,
86
+ "vocab_size": 49408
87
  },
88
  "torch_dtype": "float32",
89
+ "transformers_version": null,
90
  "vision_config": {
91
+ "_name_or_path": "",
92
+ "add_cross_attention": false,
93
+ "architectures": null,
94
+ "attention_dropout": 0.0,
95
+ "bad_words_ids": null,
96
+ "begin_suppress_tokens": null,
97
+ "bos_token_id": null,
98
+ "chunk_size_feed_forward": 0,
99
+ "cross_attention_hidden_size": null,
100
+ "decoder_start_token_id": null,
101
+ "diversity_penalty": 0.0,
102
+ "do_sample": false,
103
  "dropout": 0.0,
104
+ "early_stopping": false,
105
+ "encoder_no_repeat_ngram_size": 0,
106
+ "eos_token_id": null,
107
+ "exponential_decay_length_penalty": null,
108
+ "finetuning_task": null,
109
+ "forced_bos_token_id": null,
110
+ "forced_eos_token_id": null,
111
+ "hidden_act": "quick_gelu",
112
  "hidden_size": 1024,
113
+ "id2label": {
114
+ "0": "LABEL_0",
115
+ "1": "LABEL_1"
116
+ },
117
+ "image_size": 224,
118
+ "initializer_factor": 1.0,
119
+ "initializer_range": 0.02,
120
  "intermediate_size": 4096,
121
+ "is_decoder": false,
122
+ "is_encoder_decoder": false,
123
+ "label2id": {
124
+ "LABEL_0": 0,
125
+ "LABEL_1": 1
126
+ },
127
+ "layer_norm_eps": 1e-05,
128
+ "length_penalty": 1.0,
129
+ "max_length": 20,
130
+ "min_length": 0,
131
  "model_type": "clip_vision_model",
132
+ "no_repeat_ngram_size": 0,
133
  "num_attention_heads": 16,
134
+ "num_beam_groups": 1,
135
+ "num_beams": 1,
136
+ "num_channels": 3,
137
  "num_hidden_layers": 24,
138
+ "num_return_sequences": 1,
139
+ "output_attentions": false,
140
+ "output_hidden_states": false,
141
+ "output_scores": false,
142
+ "pad_token_id": null,
143
+ "patch_size": 14,
144
+ "prefix": null,
145
+ "problem_type": null,
146
+ "projection_dim": 512,
147
+ "pruned_heads": {},
148
+ "remove_invalid_values": false,
149
+ "repetition_penalty": 1.0,
150
+ "return_dict": true,
151
+ "return_dict_in_generate": false,
152
+ "sep_token_id": null,
153
+ "suppress_tokens": null,
154
+ "task_specific_params": null,
155
+ "temperature": 1.0,
156
+ "tf_legacy_loss": false,
157
+ "tie_encoder_decoder": false,
158
+ "tie_word_embeddings": true,
159
+ "tokenizer_class": null,
160
+ "top_k": 50,
161
+ "top_p": 1.0,
162
+ "torch_dtype": null,
163
+ "torchscript": false,
164
+ "transformers_version": "4.30.2",
165
+ "typical_p": 1.0,
166
+ "use_bfloat16": false
167
  }
168
  }
safety_checker/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec45b3c657b2756ae6392a8696cea33e4b31827916fc1ee2d23eb979738f5e43
3
- size 1216062778
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16d28f2b37109f222cdc33620fdd262102ac32112be0352a7f77e9614b35a394
3
+ size 1216064769
scheduler/scheduler_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "PNDMScheduler",
3
- "_diffusers_version": "0.28.0.dev0",
4
  "beta_end": 0.012,
5
  "beta_schedule": "scaled_linear",
6
  "beta_start": 0.00085,
 
1
  {
2
  "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.24.0",
4
  "beta_end": 0.012,
5
  "beta_schedule": "scaled_linear",
6
  "beta_start": 0.00085,
text_encoder/config.json CHANGED
@@ -19,6 +19,6 @@
19
  "pad_token_id": 1,
20
  "projection_dim": 768,
21
  "torch_dtype": "float32",
22
- "transformers_version": "4.39.0.dev0",
23
  "vocab_size": 49408
24
  }
 
19
  "pad_token_id": 1,
20
  "projection_dim": 768,
21
  "torch_dtype": "float32",
22
+ "transformers_version": "4.30.2",
23
  "vocab_size": 49408
24
  }
text_encoder/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:417b995bb5dd72e4509ebe013359076984ab2c0d4523d0a75b9b9902274556d6
3
- size 492265168
 
 
 
 
text_encoder/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:056a9e82568bee336cb338076d242f71e242f3a737b1b5c12b9e578c76cfb6a4
3
- size 492306586
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a9ce8416debe549369349713a653af9493baa1cfdd9fb1f3cc9cc7699396a41
3
+ size 492307041
tokenizer/special_tokens_map.json CHANGED
@@ -9,21 +9,15 @@
9
  "eos_token": {
10
  "content": "<|endoftext|>",
11
  "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|endoftext|>",
18
- "lstrip": false,
19
- "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
 
23
  "unk_token": {
24
  "content": "<|endoftext|>",
25
  "lstrip": false,
26
- "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  }
 
9
  "eos_token": {
10
  "content": "<|endoftext|>",
11
  "lstrip": false,
12
+ "normalized": true,
 
 
 
 
 
 
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<|endoftext|>",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
20
+ "normalized": true,
21
  "rstrip": false,
22
  "single_word": false
23
  }
tokenizer/tokenizer_config.json CHANGED
@@ -1,30 +1,33 @@
1
  {
2
  "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "49406": {
5
- "content": "<|startoftext|>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "49407": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- }
20
  },
21
- "bos_token": "<|startoftext|>",
22
  "clean_up_tokenization_spaces": true,
23
  "do_lower_case": true,
24
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
25
  "errors": "replace",
26
  "model_max_length": 77,
27
  "pad_token": "<|endoftext|>",
28
  "tokenizer_class": "CLIPTokenizer",
29
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
30
  }
 
1
  {
2
  "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
 
 
 
 
 
 
 
 
 
 
10
  },
 
11
  "clean_up_tokenization_spaces": true,
12
  "do_lower_case": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
  "errors": "replace",
22
  "model_max_length": 77,
23
  "pad_token": "<|endoftext|>",
24
  "tokenizer_class": "CLIPTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<|endoftext|>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
  }
unet/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
- "_diffusers_version": "0.28.0.dev0",
4
  "act_fn": "silu",
5
  "addition_embed_type": null,
6
  "addition_embed_type_num_heads": 64,
@@ -62,6 +62,6 @@
62
  "CrossAttnUpBlock2D",
63
  "CrossAttnUpBlock2D"
64
  ],
65
- "upcast_attention": false,
66
  "use_linear_projection": false
67
  }
 
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.24.0",
4
  "act_fn": "silu",
5
  "addition_embed_type": null,
6
  "addition_embed_type_num_heads": 64,
 
62
  "CrossAttnUpBlock2D",
63
  "CrossAttnUpBlock2D"
64
  ],
65
+ "upcast_attention": null,
66
  "use_linear_projection": false
67
  }
unet/diffusion_pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c36a01e5f958aad17608d10cfdbc69490edfc00a3605e65fa8b333f600f4ae9
3
- size 3438366838
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d95f59174bb2d41cf4195e197fd2806022a7cbafa3cc7c20294e08052b18ab2
3
+ size 3438366373
unet/diffusion_pytorch_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:35cf18d43a1baf4a537b0cfdb5e40b7bfb300f30153838001fa3b68beb4bdd55
3
- size 3438167536
 
 
 
 
vae/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "AutoencoderKL",
3
- "_diffusers_version": "0.28.0.dev0",
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
@@ -17,8 +17,6 @@
17
  "force_upcast": true,
18
  "in_channels": 3,
19
  "latent_channels": 4,
20
- "latents_mean": null,
21
- "latents_std": null,
22
  "layers_per_block": 2,
23
  "norm_num_groups": 32,
24
  "out_channels": 3,
 
1
  {
2
  "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.24.0",
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
 
17
  "force_upcast": true,
18
  "in_channels": 3,
19
  "latent_channels": 4,
 
 
20
  "layers_per_block": 2,
21
  "norm_num_groups": 32,
22
  "out_channels": 3,
vae/diffusion_pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d3d118c9806bd5dfc3da5c75f95e71dc57510fd7a3d34b7e8f510fcba3243e
3
- size 334712578
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:251086e7c7793410070d11a421db3886a7b2a7ff27cdea006a67a4cd76a7a899
3
+ size 334712113
vae/diffusion_pytorch_model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2aa1f43011b553a4cba7f37456465cdbd48aab7b54b9348b890e8058ea7683ec
3
- size 334643268