Snowad commited on
Commit
ef83bc1
1 Parent(s): 266e5c6

test with konosuba model

Browse files
feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": 224,
3
+ "do_center_crop": true,
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_resize": true,
7
+ "feature_extractor_type": "CLIPFeatureExtractor",
8
+ "image_mean": [
9
+ 0.48145466,
10
+ 0.4578275,
11
+ 0.40821073
12
+ ],
13
+ "image_std": [
14
+ 0.26862954,
15
+ 0.26130258,
16
+ 0.27577711
17
+ ],
18
+ "resample": 3,
19
+ "size": 224
20
+ }
model_index.json CHANGED
@@ -1,18 +1,17 @@
1
  {
2
  "_class_name": "StableDiffusionPipeline",
3
- "_diffusers_version": "0.10.2",
4
  "feature_extractor": [
5
- null,
6
- null
7
  ],
8
- "requires_safety_checker": null,
9
  "safety_checker": [
10
- null,
11
- null
12
  ],
13
  "scheduler": [
14
  "diffusers",
15
- "DDPMScheduler"
16
  ],
17
  "text_encoder": [
18
  "transformers",
 
1
  {
2
  "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.6.0",
4
  "feature_extractor": [
5
+ "transformers",
6
+ "CLIPFeatureExtractor"
7
  ],
 
8
  "safety_checker": [
9
+ "stable_diffusion",
10
+ "StableDiffusionSafetyChecker"
11
  ],
12
  "scheduler": [
13
  "diffusers",
14
+ "PNDMScheduler"
15
  ],
16
  "text_encoder": [
17
  "transformers",
safety_checker/config.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "CompVis/stable-diffusion-safety-checker",
3
+ "architectures": [
4
+ "StableDiffusionSafetyChecker"
5
+ ],
6
+ "initializer_factor": 1.0,
7
+ "logit_scale_init_value": 2.6592,
8
+ "model_type": "clip",
9
+ "projection_dim": 768,
10
+ "text_config": {
11
+ "_name_or_path": "",
12
+ "add_cross_attention": false,
13
+ "architectures": null,
14
+ "attention_dropout": 0.0,
15
+ "bad_words_ids": null,
16
+ "bos_token_id": 0,
17
+ "chunk_size_feed_forward": 0,
18
+ "cross_attention_hidden_size": null,
19
+ "decoder_start_token_id": null,
20
+ "diversity_penalty": 0.0,
21
+ "do_sample": false,
22
+ "dropout": 0.0,
23
+ "early_stopping": false,
24
+ "encoder_no_repeat_ngram_size": 0,
25
+ "eos_token_id": 2,
26
+ "exponential_decay_length_penalty": null,
27
+ "finetuning_task": null,
28
+ "forced_bos_token_id": null,
29
+ "forced_eos_token_id": null,
30
+ "hidden_act": "quick_gelu",
31
+ "hidden_size": 768,
32
+ "id2label": {
33
+ "0": "LABEL_0",
34
+ "1": "LABEL_1"
35
+ },
36
+ "initializer_factor": 1.0,
37
+ "initializer_range": 0.02,
38
+ "intermediate_size": 3072,
39
+ "is_decoder": false,
40
+ "is_encoder_decoder": false,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1
44
+ },
45
+ "layer_norm_eps": 1e-05,
46
+ "length_penalty": 1.0,
47
+ "max_length": 20,
48
+ "max_position_embeddings": 77,
49
+ "min_length": 0,
50
+ "model_type": "clip_text_model",
51
+ "no_repeat_ngram_size": 0,
52
+ "num_attention_heads": 12,
53
+ "num_beam_groups": 1,
54
+ "num_beams": 1,
55
+ "num_hidden_layers": 12,
56
+ "num_return_sequences": 1,
57
+ "output_attentions": false,
58
+ "output_hidden_states": false,
59
+ "output_scores": false,
60
+ "pad_token_id": 1,
61
+ "prefix": null,
62
+ "problem_type": null,
63
+ "pruned_heads": {},
64
+ "remove_invalid_values": false,
65
+ "repetition_penalty": 1.0,
66
+ "return_dict": true,
67
+ "return_dict_in_generate": false,
68
+ "sep_token_id": null,
69
+ "task_specific_params": null,
70
+ "temperature": 1.0,
71
+ "tf_legacy_loss": false,
72
+ "tie_encoder_decoder": false,
73
+ "tie_word_embeddings": true,
74
+ "tokenizer_class": null,
75
+ "top_k": 50,
76
+ "top_p": 1.0,
77
+ "torch_dtype": null,
78
+ "torchscript": false,
79
+ "transformers_version": "4.21.0",
80
+ "typical_p": 1.0,
81
+ "use_bfloat16": false,
82
+ "vocab_size": 49408
83
+ },
84
+ "text_config_dict": {
85
+ "hidden_size": 768,
86
+ "intermediate_size": 3072,
87
+ "num_attention_heads": 12,
88
+ "num_hidden_layers": 12
89
+ },
90
+ "torch_dtype": "float32",
91
+ "transformers_version": null,
92
+ "vision_config": {
93
+ "_name_or_path": "",
94
+ "add_cross_attention": false,
95
+ "architectures": null,
96
+ "attention_dropout": 0.0,
97
+ "bad_words_ids": null,
98
+ "bos_token_id": null,
99
+ "chunk_size_feed_forward": 0,
100
+ "cross_attention_hidden_size": null,
101
+ "decoder_start_token_id": null,
102
+ "diversity_penalty": 0.0,
103
+ "do_sample": false,
104
+ "dropout": 0.0,
105
+ "early_stopping": false,
106
+ "encoder_no_repeat_ngram_size": 0,
107
+ "eos_token_id": null,
108
+ "exponential_decay_length_penalty": null,
109
+ "finetuning_task": null,
110
+ "forced_bos_token_id": null,
111
+ "forced_eos_token_id": null,
112
+ "hidden_act": "quick_gelu",
113
+ "hidden_size": 1024,
114
+ "id2label": {
115
+ "0": "LABEL_0",
116
+ "1": "LABEL_1"
117
+ },
118
+ "image_size": 224,
119
+ "initializer_factor": 1.0,
120
+ "initializer_range": 0.02,
121
+ "intermediate_size": 4096,
122
+ "is_decoder": false,
123
+ "is_encoder_decoder": false,
124
+ "label2id": {
125
+ "LABEL_0": 0,
126
+ "LABEL_1": 1
127
+ },
128
+ "layer_norm_eps": 1e-05,
129
+ "length_penalty": 1.0,
130
+ "max_length": 20,
131
+ "min_length": 0,
132
+ "model_type": "clip_vision_model",
133
+ "no_repeat_ngram_size": 0,
134
+ "num_attention_heads": 16,
135
+ "num_beam_groups": 1,
136
+ "num_beams": 1,
137
+ "num_channels": 3,
138
+ "num_hidden_layers": 24,
139
+ "num_return_sequences": 1,
140
+ "output_attentions": false,
141
+ "output_hidden_states": false,
142
+ "output_scores": false,
143
+ "pad_token_id": null,
144
+ "patch_size": 14,
145
+ "prefix": null,
146
+ "problem_type": null,
147
+ "pruned_heads": {},
148
+ "remove_invalid_values": false,
149
+ "repetition_penalty": 1.0,
150
+ "return_dict": true,
151
+ "return_dict_in_generate": false,
152
+ "sep_token_id": null,
153
+ "task_specific_params": null,
154
+ "temperature": 1.0,
155
+ "tf_legacy_loss": false,
156
+ "tie_encoder_decoder": false,
157
+ "tie_word_embeddings": true,
158
+ "tokenizer_class": null,
159
+ "top_k": 50,
160
+ "top_p": 1.0,
161
+ "torch_dtype": null,
162
+ "torchscript": false,
163
+ "transformers_version": "4.21.0",
164
+ "typical_p": 1.0,
165
+ "use_bfloat16": false
166
+ },
167
+ "vision_config_dict": {
168
+ "hidden_size": 1024,
169
+ "intermediate_size": 4096,
170
+ "num_attention_heads": 16,
171
+ "num_hidden_layers": 24,
172
+ "patch_size": 14
173
+ }
174
+ }
safety_checker/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:193490b58ef62739077262e833bf091c66c29488058681ac25cf7df3d8190974
3
+ size 1216061799
scheduler/scheduler_config.json CHANGED
@@ -1,14 +1,12 @@
1
  {
2
- "_class_name": "DDPMScheduler",
3
- "_diffusers_version": "0.10.2",
4
  "beta_end": 0.012,
5
- "beta_schedule": "linear",
6
  "beta_start": 0.00085,
7
- "clip_sample": false,
8
  "num_train_timesteps": 1000,
9
- "prediction_type": "epsilon",
10
  "set_alpha_to_one": false,
 
11
  "steps_offset": 1,
12
- "trained_betas": null,
13
- "variance_type": "fixed_small"
14
  }
 
1
  {
2
+ "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.6.0",
4
  "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
  "beta_start": 0.00085,
 
7
  "num_train_timesteps": 1000,
 
8
  "set_alpha_to_one": false,
9
+ "skip_prk_steps": true,
10
  "steps_offset": 1,
11
+ "trained_betas": null
 
12
  }
text_encoder/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "logs\\MyneFactoryBase_20230203-010401\\ckpts\\last-MyneFactoryBase-ep19-gs115520",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
@@ -20,6 +20,6 @@
20
  "pad_token_id": 1,
21
  "projection_dim": 768,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.25.1",
24
  "vocab_size": 49408
25
  }
 
1
  {
2
+ "_name_or_path": "openai/clip-vit-large-patch14",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
 
20
  "pad_token_id": 1,
21
  "projection_dim": 768,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.21.0",
24
  "vocab_size": 49408
25
  }
text_encoder/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d8898ffbfae0b20762baee0ebbe664d79ed6ebdb1fff17dc55ca5527526007c
3
- size 492308087
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:389532dada8efa38871eb92d2f8366a927b1b8ea4fd6a7215d23d834c51c80d3
3
+ size 492305335
tokenizer/tokenizer_config.json CHANGED
@@ -19,7 +19,7 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 77,
22
- "name_or_path": "logs\\MyneFactoryBase_20230203-010401\\ckpts\\last-MyneFactoryBase-ep19-gs115520",
23
  "pad_token": "<|endoftext|>",
24
  "special_tokens_map_file": "./special_tokens_map.json",
25
  "tokenizer_class": "CLIPTokenizer",
@@ -30,6 +30,5 @@
30
  "normalized": true,
31
  "rstrip": false,
32
  "single_word": false
33
- },
34
- "use_fast": false
35
  }
 
19
  },
20
  "errors": "replace",
21
  "model_max_length": 77,
22
+ "name_or_path": "openai/clip-vit-large-patch14",
23
  "pad_token": "<|endoftext|>",
24
  "special_tokens_map_file": "./special_tokens_map.json",
25
  "tokenizer_class": "CLIPTokenizer",
 
30
  "normalized": true,
31
  "rstrip": false,
32
  "single_word": false
33
+ }
 
34
  }
unet/config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
- "_diffusers_version": "0.10.2",
4
- "_name_or_path": "logs\\MyneFactoryBase_20230203-010401\\ckpts\\last-MyneFactoryBase-ep19-gs115520",
5
  "act_fn": "silu",
6
  "attention_head_dim": 8,
7
  "block_out_channels": [
@@ -19,7 +18,6 @@
19
  "DownBlock2D"
20
  ],
21
  "downsample_padding": 1,
22
- "dual_cross_attention": false,
23
  "flip_sin_to_cos": true,
24
  "freq_shift": 0,
25
  "in_channels": 4,
@@ -27,16 +25,12 @@
27
  "mid_block_scale_factor": 1,
28
  "norm_eps": 1e-05,
29
  "norm_num_groups": 32,
30
- "num_class_embeds": null,
31
- "only_cross_attention": false,
32
  "out_channels": 4,
33
- "sample_size": 96,
34
  "up_block_types": [
35
  "UpBlock2D",
36
  "CrossAttnUpBlock2D",
37
  "CrossAttnUpBlock2D",
38
  "CrossAttnUpBlock2D"
39
- ],
40
- "upcast_attention": false,
41
- "use_linear_projection": false
42
  }
 
1
  {
2
  "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.6.0",
 
4
  "act_fn": "silu",
5
  "attention_head_dim": 8,
6
  "block_out_channels": [
 
18
  "DownBlock2D"
19
  ],
20
  "downsample_padding": 1,
 
21
  "flip_sin_to_cos": true,
22
  "freq_shift": 0,
23
  "in_channels": 4,
 
25
  "mid_block_scale_factor": 1,
26
  "norm_eps": 1e-05,
27
  "norm_num_groups": 32,
 
 
28
  "out_channels": 4,
29
+ "sample_size": 32,
30
  "up_block_types": [
31
  "UpBlock2D",
32
  "CrossAttnUpBlock2D",
33
  "CrossAttnUpBlock2D",
34
  "CrossAttnUpBlock2D"
35
+ ]
 
 
36
  }
unet/diffusion_pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcfdc45fd2ed4d8b31f51faa909baf96f58d5942038c267f47603850468445bf
3
- size 3438364325
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c820f9b296154e60d6581c0bbd62cf95845f0ab2004e36e9ef3faf39a55ae3
3
+ size 3438354725
vae/config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_class_name": "AutoencoderKL",
3
- "_diffusers_version": "0.10.2",
4
- "_name_or_path": "logs\\MyneFactoryBase_20230203-010401\\ckpts\\last-MyneFactoryBase-ep19-gs115520",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
@@ -20,7 +19,7 @@
20
  "layers_per_block": 2,
21
  "norm_num_groups": 32,
22
  "out_channels": 3,
23
- "sample_size": 768,
24
  "up_block_types": [
25
  "UpDecoderBlock2D",
26
  "UpDecoderBlock2D",
 
1
  {
2
  "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.6.0",
 
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
 
19
  "layers_per_block": 2,
20
  "norm_num_groups": 32,
21
  "out_channels": 3,
22
+ "sample_size": 256,
23
  "up_block_types": [
24
  "UpDecoderBlock2D",
25
  "UpDecoderBlock2D",
vae/diffusion_pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a996c812c0e5b1a5c5815ab0192244c9876e073a6eda3b576b204a968167c3
3
- size 167402961
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e20f5df1c5196f0caa2f777d2ed840f5517f5775f98c38bd6595c31aaea40c3
3
+ size 334707217