.gitattributes CHANGED
@@ -32,4 +32,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
33
  trinart_characters_it4_v1.ckpt filter=lfs diff=lfs merge=lfs -text
34
  autoencoder_kl-f8-trinart_characters.ckpt filter=lfs diff=lfs merge=lfs -text
35
- autoencoder_fix_kl-f8-trinart_characters.ckpt filter=lfs diff=lfs merge=lfs -text
 
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
33
  trinart_characters_it4_v1.ckpt filter=lfs diff=lfs merge=lfs -text
34
  autoencoder_kl-f8-trinart_characters.ckpt filter=lfs diff=lfs merge=lfs -text
 
README.md CHANGED
@@ -7,15 +7,6 @@ tags:
7
  license: creativeml-openrail-m
8
  ---
9
 
10
-
11
-
12
- ## Note
13
-
14
- A newer version of this model has been released:
15
- https://huggingface.co/naclbit/trinart_derrida_characters_v2_stable_diffusion
16
-
17
-
18
-
19
  ## Stable Diffusion TrinArt Characters model v1
20
 
21
  trinart_characters_19.2m_stable_diffusion_v1 is a stable diffusion v1-based model trained by roughly 19.2M anime/manga style images (pre-rolled augmented images included) plus final finetuning by about 50,000 images. This model seeks for a sweet spot between artistic style versatility and anatomical quality within the given model spec of SDv1.
@@ -28,8 +19,6 @@ This is the same version 1 model that was released in AI Novelist/TrinArt servic
28
 
29
  #### Custom autoencoder
30
 
31
- *Note: There was a wrong checkpoint uploaded before 5 Nov 2022. The file has been replaced with the latest checkpoint.*
32
-
33
  We also provide a separate checkpoint for the custom KL autoencoder. As suggested by the Latent Diffusion paper, we found that training the autoencoder and the latent diffusion model separately improves the result. Since the official stable diffusion script does not support loading the other VAE, in order to run it in your script, you'll need to override state_dict for first_stage_model.
34
 
35
  The popular WebUI has the script to load separate first_stage_model parameters.
 
7
  license: creativeml-openrail-m
8
  ---
9
 
 
 
 
 
 
 
 
 
 
10
  ## Stable Diffusion TrinArt Characters model v1
11
 
12
  trinart_characters_19.2m_stable_diffusion_v1 is a stable diffusion v1-based model trained by roughly 19.2M anime/manga style images (pre-rolled augmented images included) plus final finetuning by about 50,000 images. This model seeks for a sweet spot between artistic style versatility and anatomical quality within the given model spec of SDv1.
 
19
 
20
  #### Custom autoencoder
21
 
 
 
22
  We also provide a separate checkpoint for the custom KL autoencoder. As suggested by the Latent Diffusion paper, we found that training the autoencoder and the latent diffusion model separately improves the result. Since the official stable diffusion script does not support loading the other VAE, in order to run it in your script, you'll need to override state_dict for first_stage_model.
23
 
24
  The popular WebUI has the script to load separate first_stage_model parameters.
autoencoder_fix_kl-f8-trinart_characters.ckpt → autoencoder_kl-f8-trinart_characters.ckpt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2453b80bc1716bc3f94496d4e56be891e267051dc43c5144f384b66a73ac8295
3
  size 404661793
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2dd1c82220e31a72bd9958dda249ed7f94faf875d5123ae3aab7a1950a82a8f
3
  size 404661793
feature_extractor/preprocessor_config.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "crop_size": {
3
- "height": 224,
4
- "width": 224
5
- },
6
- "do_center_crop": true,
7
- "do_convert_rgb": true,
8
- "do_normalize": true,
9
- "do_rescale": true,
10
- "do_resize": true,
11
- "feature_extractor_type": "CLIPFeatureExtractor",
12
- "image_mean": [
13
- 0.48145466,
14
- 0.4578275,
15
- 0.40821073
16
- ],
17
- "image_processor_type": "CLIPFeatureExtractor",
18
- "image_std": [
19
- 0.26862954,
20
- 0.26130258,
21
- 0.27577711
22
- ],
23
- "resample": 3,
24
- "rescale_factor": 0.00392156862745098,
25
- "size": {
26
- "shortest_edge": 224
27
- }
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model_index.json DELETED
@@ -1,33 +0,0 @@
1
- {
2
- "_class_name": "StableDiffusionPipeline",
3
- "_diffusers_version": "0.12.0.dev0",
4
- "feature_extractor": [
5
- "transformers",
6
- "CLIPImageProcessor"
7
- ],
8
- "requires_safety_checker": true,
9
- "safety_checker": [
10
- "stable_diffusion",
11
- "StableDiffusionSafetyChecker"
12
- ],
13
- "scheduler": [
14
- "diffusers",
15
- "PNDMScheduler"
16
- ],
17
- "text_encoder": [
18
- "transformers",
19
- "CLIPTextModel"
20
- ],
21
- "tokenizer": [
22
- "transformers",
23
- "CLIPTokenizer"
24
- ],
25
- "unet": [
26
- "diffusers",
27
- "UNet2DConditionModel"
28
- ],
29
- "vae": [
30
- "diffusers",
31
- "AutoencoderKL"
32
- ]
33
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
safety_checker/config.json DELETED
@@ -1,181 +0,0 @@
1
- {
2
- "_commit_hash": "cb41f3a270d63d454d385fc2e4f571c487c253c5",
3
- "_name_or_path": "CompVis/stable-diffusion-safety-checker",
4
- "architectures": [
5
- "StableDiffusionSafetyChecker"
6
- ],
7
- "initializer_factor": 1.0,
8
- "logit_scale_init_value": 2.6592,
9
- "model_type": "clip",
10
- "projection_dim": 768,
11
- "text_config": {
12
- "_name_or_path": "",
13
- "add_cross_attention": false,
14
- "architectures": null,
15
- "attention_dropout": 0.0,
16
- "bad_words_ids": null,
17
- "begin_suppress_tokens": null,
18
- "bos_token_id": 0,
19
- "chunk_size_feed_forward": 0,
20
- "cross_attention_hidden_size": null,
21
- "decoder_start_token_id": null,
22
- "diversity_penalty": 0.0,
23
- "do_sample": false,
24
- "dropout": 0.0,
25
- "early_stopping": false,
26
- "encoder_no_repeat_ngram_size": 0,
27
- "eos_token_id": 2,
28
- "exponential_decay_length_penalty": null,
29
- "finetuning_task": null,
30
- "forced_bos_token_id": null,
31
- "forced_eos_token_id": null,
32
- "hidden_act": "quick_gelu",
33
- "hidden_size": 768,
34
- "id2label": {
35
- "0": "LABEL_0",
36
- "1": "LABEL_1"
37
- },
38
- "initializer_factor": 1.0,
39
- "initializer_range": 0.02,
40
- "intermediate_size": 3072,
41
- "is_decoder": false,
42
- "is_encoder_decoder": false,
43
- "label2id": {
44
- "LABEL_0": 0,
45
- "LABEL_1": 1
46
- },
47
- "layer_norm_eps": 1e-05,
48
- "length_penalty": 1.0,
49
- "max_length": 20,
50
- "max_position_embeddings": 77,
51
- "min_length": 0,
52
- "model_type": "clip_text_model",
53
- "no_repeat_ngram_size": 0,
54
- "num_attention_heads": 12,
55
- "num_beam_groups": 1,
56
- "num_beams": 1,
57
- "num_hidden_layers": 12,
58
- "num_return_sequences": 1,
59
- "output_attentions": false,
60
- "output_hidden_states": false,
61
- "output_scores": false,
62
- "pad_token_id": 1,
63
- "prefix": null,
64
- "problem_type": null,
65
- "projection_dim": 512,
66
- "pruned_heads": {},
67
- "remove_invalid_values": false,
68
- "repetition_penalty": 1.0,
69
- "return_dict": true,
70
- "return_dict_in_generate": false,
71
- "sep_token_id": null,
72
- "suppress_tokens": null,
73
- "task_specific_params": null,
74
- "temperature": 1.0,
75
- "tf_legacy_loss": false,
76
- "tie_encoder_decoder": false,
77
- "tie_word_embeddings": true,
78
- "tokenizer_class": null,
79
- "top_k": 50,
80
- "top_p": 1.0,
81
- "torch_dtype": null,
82
- "torchscript": false,
83
- "transformers_version": "4.26.0.dev0",
84
- "typical_p": 1.0,
85
- "use_bfloat16": false,
86
- "vocab_size": 49408
87
- },
88
- "text_config_dict": {
89
- "hidden_size": 768,
90
- "intermediate_size": 3072,
91
- "num_attention_heads": 12,
92
- "num_hidden_layers": 12
93
- },
94
- "torch_dtype": "float32",
95
- "transformers_version": null,
96
- "vision_config": {
97
- "_name_or_path": "",
98
- "add_cross_attention": false,
99
- "architectures": null,
100
- "attention_dropout": 0.0,
101
- "bad_words_ids": null,
102
- "begin_suppress_tokens": null,
103
- "bos_token_id": null,
104
- "chunk_size_feed_forward": 0,
105
- "cross_attention_hidden_size": null,
106
- "decoder_start_token_id": null,
107
- "diversity_penalty": 0.0,
108
- "do_sample": false,
109
- "dropout": 0.0,
110
- "early_stopping": false,
111
- "encoder_no_repeat_ngram_size": 0,
112
- "eos_token_id": null,
113
- "exponential_decay_length_penalty": null,
114
- "finetuning_task": null,
115
- "forced_bos_token_id": null,
116
- "forced_eos_token_id": null,
117
- "hidden_act": "quick_gelu",
118
- "hidden_size": 1024,
119
- "id2label": {
120
- "0": "LABEL_0",
121
- "1": "LABEL_1"
122
- },
123
- "image_size": 224,
124
- "initializer_factor": 1.0,
125
- "initializer_range": 0.02,
126
- "intermediate_size": 4096,
127
- "is_decoder": false,
128
- "is_encoder_decoder": false,
129
- "label2id": {
130
- "LABEL_0": 0,
131
- "LABEL_1": 1
132
- },
133
- "layer_norm_eps": 1e-05,
134
- "length_penalty": 1.0,
135
- "max_length": 20,
136
- "min_length": 0,
137
- "model_type": "clip_vision_model",
138
- "no_repeat_ngram_size": 0,
139
- "num_attention_heads": 16,
140
- "num_beam_groups": 1,
141
- "num_beams": 1,
142
- "num_channels": 3,
143
- "num_hidden_layers": 24,
144
- "num_return_sequences": 1,
145
- "output_attentions": false,
146
- "output_hidden_states": false,
147
- "output_scores": false,
148
- "pad_token_id": null,
149
- "patch_size": 14,
150
- "prefix": null,
151
- "problem_type": null,
152
- "projection_dim": 512,
153
- "pruned_heads": {},
154
- "remove_invalid_values": false,
155
- "repetition_penalty": 1.0,
156
- "return_dict": true,
157
- "return_dict_in_generate": false,
158
- "sep_token_id": null,
159
- "suppress_tokens": null,
160
- "task_specific_params": null,
161
- "temperature": 1.0,
162
- "tf_legacy_loss": false,
163
- "tie_encoder_decoder": false,
164
- "tie_word_embeddings": true,
165
- "tokenizer_class": null,
166
- "top_k": 50,
167
- "top_p": 1.0,
168
- "torch_dtype": null,
169
- "torchscript": false,
170
- "transformers_version": "4.26.0.dev0",
171
- "typical_p": 1.0,
172
- "use_bfloat16": false
173
- },
174
- "vision_config_dict": {
175
- "hidden_size": 1024,
176
- "intermediate_size": 4096,
177
- "num_attention_heads": 16,
178
- "num_hidden_layers": 24,
179
- "patch_size": 14
180
- }
181
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
safety_checker/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:16d28f2b37109f222cdc33620fdd262102ac32112be0352a7f77e9614b35a394
3
- size 1216064769
 
 
 
 
scheduler/scheduler_config.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "_class_name": "PNDMScheduler",
3
- "_diffusers_version": "0.12.0.dev0",
4
- "beta_end": 0.012,
5
- "beta_schedule": "scaled_linear",
6
- "beta_start": 0.00085,
7
- "clip_sample": false,
8
- "num_train_timesteps": 1000,
9
- "prediction_type": "epsilon",
10
- "set_alpha_to_one": false,
11
- "skip_prk_steps": true,
12
- "steps_offset": 1,
13
- "trained_betas": null
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
text_encoder/config.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "_name_or_path": "openai/clip-vit-large-patch14",
3
- "architectures": [
4
- "CLIPTextModel"
5
- ],
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 0,
8
- "dropout": 0.0,
9
- "eos_token_id": 2,
10
- "hidden_act": "quick_gelu",
11
- "hidden_size": 768,
12
- "initializer_factor": 1.0,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
- "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 77,
17
- "model_type": "clip_text_model",
18
- "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
- "pad_token_id": 1,
21
- "projection_dim": 768,
22
- "torch_dtype": "float32",
23
- "transformers_version": "4.26.0.dev0",
24
- "vocab_size": 49408
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
text_encoder/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aad0e7cec126b7ee2a36e52fef25ffc4a8c41ff0b2c7a1cd07f5e693680edab5
3
- size 492307041
 
 
 
 
tokenizer/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json DELETED
@@ -1,24 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|startoftext|>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": "<|endoftext|>",
17
- "unk_token": {
18
- "content": "<|endoftext|>",
19
- "lstrip": false,
20
- "normalized": true,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer/tokenizer_config.json DELETED
@@ -1,34 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": {
4
- "__type": "AddedToken",
5
- "content": "<|startoftext|>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false
10
- },
11
- "do_lower_case": true,
12
- "eos_token": {
13
- "__type": "AddedToken",
14
- "content": "<|endoftext|>",
15
- "lstrip": false,
16
- "normalized": true,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
- "errors": "replace",
21
- "model_max_length": 77,
22
- "name_or_path": "openai/clip-vit-large-patch14",
23
- "pad_token": "<|endoftext|>",
24
- "special_tokens_map_file": "./special_tokens_map.json",
25
- "tokenizer_class": "CLIPTokenizer",
26
- "unk_token": {
27
- "__type": "AddedToken",
28
- "content": "<|endoftext|>",
29
- "lstrip": false,
30
- "normalized": true,
31
- "rstrip": false,
32
- "single_word": false
33
- }
34
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
unet/config.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "_class_name": "UNet2DConditionModel",
3
- "_diffusers_version": "0.12.0.dev0",
4
- "act_fn": "silu",
5
- "attention_head_dim": 8,
6
- "block_out_channels": [
7
- 320,
8
- 640,
9
- 1280,
10
- 1280
11
- ],
12
- "center_input_sample": false,
13
- "class_embed_type": null,
14
- "cross_attention_dim": 768,
15
- "down_block_types": [
16
- "CrossAttnDownBlock2D",
17
- "CrossAttnDownBlock2D",
18
- "CrossAttnDownBlock2D",
19
- "DownBlock2D"
20
- ],
21
- "downsample_padding": 1,
22
- "dual_cross_attention": false,
23
- "flip_sin_to_cos": true,
24
- "freq_shift": 0,
25
- "in_channels": 4,
26
- "layers_per_block": 2,
27
- "mid_block_scale_factor": 1,
28
- "mid_block_type": "UNetMidBlock2DCrossAttn",
29
- "norm_eps": 1e-05,
30
- "norm_num_groups": 32,
31
- "num_class_embeds": null,
32
- "only_cross_attention": false,
33
- "out_channels": 4,
34
- "resnet_time_scale_shift": "default",
35
- "sample_size": 64,
36
- "up_block_types": [
37
- "UpBlock2D",
38
- "CrossAttnUpBlock2D",
39
- "CrossAttnUpBlock2D",
40
- "CrossAttnUpBlock2D"
41
- ],
42
- "upcast_attention": false,
43
- "use_linear_projection": false
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
unet/diffusion_pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:22975eb114b3a1b085d0e4f199210aad32a6ec1a85547d48f2e4a6f4c9410c8b
3
- size 3438366373
 
 
 
 
vae/config.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "_class_name": "AutoencoderKL",
3
- "_diffusers_version": "0.12.0.dev0",
4
- "act_fn": "silu",
5
- "block_out_channels": [
6
- 128,
7
- 256,
8
- 512,
9
- 512
10
- ],
11
- "down_block_types": [
12
- "DownEncoderBlock2D",
13
- "DownEncoderBlock2D",
14
- "DownEncoderBlock2D",
15
- "DownEncoderBlock2D"
16
- ],
17
- "in_channels": 3,
18
- "latent_channels": 4,
19
- "layers_per_block": 2,
20
- "norm_num_groups": 32,
21
- "out_channels": 3,
22
- "sample_size": 512,
23
- "scaling_factor": 0.18215,
24
- "up_block_types": [
25
- "UpDecoderBlock2D",
26
- "UpDecoderBlock2D",
27
- "UpDecoderBlock2D",
28
- "UpDecoderBlock2D"
29
- ]
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vae/diffusion_pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6723bacd3c60b11a2b4e6007338a54c6964c210116c3ccecb3bfc80e218afc8f
3
- size 334711857