gcpai

by yang778 - opened Oct 25, 2022

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

-98745

Files changed (18) hide show

.gitattributes +0 -1
README.md +0 -11
autoencoder_fix_kl-f8-trinart_characters.ckpt → autoencoder_kl-f8-trinart_characters.ckpt +1 -1
feature_extractor/preprocessor_config.json +0 -28
model_index.json +0 -33
safety_checker/config.json +0 -181
safety_checker/pytorch_model.bin +0 -3
scheduler/scheduler_config.json +0 -14
text_encoder/config.json +0 -25
text_encoder/pytorch_model.bin +0 -3
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +0 -24
tokenizer/tokenizer_config.json +0 -34
tokenizer/vocab.json +0 -0
unet/config.json +0 -44
unet/diffusion_pytorch_model.bin +0 -3
vae/config.json +0 -30
vae/diffusion_pytorch_model.bin +0 -3

.gitattributes CHANGED Viewed

@@ -32,4 +32,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 trinart_characters_it4_v1.ckpt filter=lfs diff=lfs merge=lfs -text
 autoencoder_kl-f8-trinart_characters.ckpt filter=lfs diff=lfs merge=lfs -text
-autoencoder_fix_kl-f8-trinart_characters.ckpt filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 trinart_characters_it4_v1.ckpt filter=lfs diff=lfs merge=lfs -text
 autoencoder_kl-f8-trinart_characters.ckpt filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -7,15 +7,6 @@ tags:
 license: creativeml-openrail-m
 ---
-## Note
-A newer version of this model has been released:
-https://huggingface.co/naclbit/trinart_derrida_characters_v2_stable_diffusion
 ## Stable Diffusion TrinArt Characters model v1
 trinart_characters_19.2m_stable_diffusion_v1 is a stable diffusion v1-based model trained by roughly 19.2M anime/manga style images (pre-rolled augmented images included) plus final finetuning by about 50,000 images. This model seeks for a sweet spot between artistic style versatility and anatomical quality within the given model spec of SDv1.
@@ -28,8 +19,6 @@ This is the same version 1 model that was released in AI Novelist/TrinArt servic
 #### Custom autoencoder
-*Note: There was a wrong checkpoint uploaded before 5 Nov 2022. The file has been replaced with the latest checkpoint.*
 We also provide a separate checkpoint for the custom KL autoencoder. As suggested by the Latent Diffusion paper, we found that training the autoencoder and the latent diffusion model separately improves the result. Since the official stable diffusion script does not support loading the other VAE, in order to run it in your script, you'll need to override state_dict for first_stage_model.
 The popular WebUI has the script to load separate first_stage_model parameters.

 license: creativeml-openrail-m
 ---
 ## Stable Diffusion TrinArt Characters model v1
 trinart_characters_19.2m_stable_diffusion_v1 is a stable diffusion v1-based model trained by roughly 19.2M anime/manga style images (pre-rolled augmented images included) plus final finetuning by about 50,000 images. This model seeks for a sweet spot between artistic style versatility and anatomical quality within the given model spec of SDv1.
 #### Custom autoencoder
 We also provide a separate checkpoint for the custom KL autoencoder. As suggested by the Latent Diffusion paper, we found that training the autoencoder and the latent diffusion model separately improves the result. Since the official stable diffusion script does not support loading the other VAE, in order to run it in your script, you'll need to override state_dict for first_stage_model.
 The popular WebUI has the script to load separate first_stage_model parameters.

autoencoder_fix_kl-f8-trinart_characters.ckpt → autoencoder_kl-f8-trinart_characters.ckpt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2453b80bc1716bc3f94496d4e56be891e267051dc43c5144f384b66a73ac8295
 size 404661793

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2dd1c82220e31a72bd9958dda249ed7f94faf875d5123ae3aab7a1950a82a8f
 size 404661793

feature_extractor/preprocessor_config.json DELETED Viewed

@@ -1,28 +0,0 @@
-{
-  "crop_size": {
-    "height": 224,
-    "width": 224
-  },
-  "do_center_crop": true,
-  "do_convert_rgb": true,
-  "do_normalize": true,
-  "do_rescale": true,
-  "do_resize": true,
-  "feature_extractor_type": "CLIPFeatureExtractor",
-  "image_mean": [
-    0.48145466,
-    0.4578275,
-    0.40821073
-  ],
-  "image_processor_type": "CLIPFeatureExtractor",
-  "image_std": [
-    0.26862954,
-    0.26130258,
-    0.27577711
-  ],
-  "resample": 3,
-  "rescale_factor": 0.00392156862745098,
-  "size": {
-    "shortest_edge": 224
-  }
-}

model_index.json DELETED Viewed

@@ -1,33 +0,0 @@
-{
-  "_class_name": "StableDiffusionPipeline",
-  "_diffusers_version": "0.12.0.dev0",
-  "feature_extractor": [
-    "transformers",
-    "CLIPImageProcessor"
-  ],
-  "requires_safety_checker": true,
-  "safety_checker": [
-    "stable_diffusion",
-    "StableDiffusionSafetyChecker"
-  ],
-  "scheduler": [
-    "diffusers",
-    "PNDMScheduler"
-  ],
-  "text_encoder": [
-    "transformers",
-    "CLIPTextModel"
-  ],
-  "tokenizer": [
-    "transformers",
-    "CLIPTokenizer"
-  ],
-  "unet": [
-    "diffusers",
-    "UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ]
-}

safety_checker/config.json DELETED Viewed

@@ -1,181 +0,0 @@
-{
-  "_commit_hash": "cb41f3a270d63d454d385fc2e4f571c487c253c5",
-  "_name_or_path": "CompVis/stable-diffusion-safety-checker",
-  "architectures": [
-    "StableDiffusionSafetyChecker"
-  ],
-  "initializer_factor": 1.0,
-  "logit_scale_init_value": 2.6592,
-  "model_type": "clip",
-  "projection_dim": 768,
-  "text_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": 0,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "dropout": 0.0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": 2,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "quick_gelu",
-    "hidden_size": 768,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "initializer_factor": 1.0,
-    "initializer_range": 0.02,
-    "intermediate_size": 3072,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "layer_norm_eps": 1e-05,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "max_position_embeddings": 77,
-    "min_length": 0,
-    "model_type": "clip_text_model",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 12,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_hidden_layers": 12,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": 1,
-    "prefix": null,
-    "problem_type": null,
-    "projection_dim": 512,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": null,
-    "torchscript": false,
-    "transformers_version": "4.26.0.dev0",
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "vocab_size": 49408
-  },
-  "text_config_dict": {
-    "hidden_size": 768,
-    "intermediate_size": 3072,
-    "num_attention_heads": 12,
-    "num_hidden_layers": 12
-  },
-  "torch_dtype": "float32",
-  "transformers_version": null,
-  "vision_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "dropout": 0.0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "quick_gelu",
-    "hidden_size": 1024,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "image_size": 224,
-    "initializer_factor": 1.0,
-    "initializer_range": 0.02,
-    "intermediate_size": 4096,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "layer_norm_eps": 1e-05,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "min_length": 0,
-    "model_type": "clip_vision_model",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 16,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_channels": 3,
-    "num_hidden_layers": 24,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
-    "patch_size": 14,
-    "prefix": null,
-    "problem_type": null,
-    "projection_dim": 512,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": null,
-    "torchscript": false,
-    "transformers_version": "4.26.0.dev0",
-    "typical_p": 1.0,
-    "use_bfloat16": false
-  },
-  "vision_config_dict": {
-    "hidden_size": 1024,
-    "intermediate_size": 4096,
-    "num_attention_heads": 16,
-    "num_hidden_layers": 24,
-    "patch_size": 14
-  }
-}

safety_checker/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:16d28f2b37109f222cdc33620fdd262102ac32112be0352a7f77e9614b35a394
-size 1216064769

scheduler/scheduler_config.json DELETED Viewed

@@ -1,14 +0,0 @@
-{
-  "_class_name": "PNDMScheduler",
-  "_diffusers_version": "0.12.0.dev0",
-  "beta_end": 0.012,
-  "beta_schedule": "scaled_linear",
-  "beta_start": 0.00085,
-  "clip_sample": false,
-  "num_train_timesteps": 1000,
-  "prediction_type": "epsilon",
-  "set_alpha_to_one": false,
-  "skip_prk_steps": true,
-  "steps_offset": 1,
-  "trained_betas": null
-}

text_encoder/config.json DELETED Viewed

@@ -1,25 +0,0 @@
-{
-  "_name_or_path": "openai/clip-vit-large-patch14",
-  "architectures": [
-    "CLIPTextModel"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 0,
-  "dropout": 0.0,
-  "eos_token_id": 2,
-  "hidden_act": "quick_gelu",
-  "hidden_size": 768,
-  "initializer_factor": 1.0,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 77,
-  "model_type": "clip_text_model",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "projection_dim": 768,
-  "torch_dtype": "float32",
-  "transformers_version": "4.26.0.dev0",
-  "vocab_size": 49408
-}

text_encoder/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:aad0e7cec126b7ee2a36e52fef25ffc4a8c41ff0b2c7a1cd07f5e693680edab5
-size 492307041

tokenizer/merges.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json DELETED Viewed

@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "<|endoftext|>",
-  "unk_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}

tokenizer/tokenizer_config.json DELETED Viewed

@@ -1,34 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "errors": "replace",
-  "model_max_length": 77,
-  "name_or_path": "openai/clip-vit-large-patch14",
-  "pad_token": "<|endoftext|>",
-  "special_tokens_map_file": "./special_tokens_map.json",
-  "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}

tokenizer/vocab.json DELETED Viewed

The diff for this file is too large to render. See raw diff

unet/config.json DELETED Viewed

@@ -1,44 +0,0 @@
-{
-  "_class_name": "UNet2DConditionModel",
-  "_diffusers_version": "0.12.0.dev0",
-  "act_fn": "silu",
-  "attention_head_dim": 8,
-  "block_out_channels": [
-    320,
-    640,
-    1280,
-    1280
-  ],
-  "center_input_sample": false,
-  "class_embed_type": null,
-  "cross_attention_dim": 768,
-  "down_block_types": [
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "DownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "dual_cross_attention": false,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "in_channels": 4,
-  "layers_per_block": 2,
-  "mid_block_scale_factor": 1,
-  "mid_block_type": "UNetMidBlock2DCrossAttn",
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "num_class_embeds": null,
-  "only_cross_attention": false,
-  "out_channels": 4,
-  "resnet_time_scale_shift": "default",
-  "sample_size": 64,
-  "up_block_types": [
-    "UpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D"
-  ],
-  "upcast_attention": false,
-  "use_linear_projection": false
-}

unet/diffusion_pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:22975eb114b3a1b085d0e4f199210aad32a6ec1a85547d48f2e4a6f4c9410c8b
-size 3438366373

vae/config.json DELETED Viewed

@@ -1,30 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.12.0.dev0",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 512,
-  "scaling_factor": 0.18215,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}

vae/diffusion_pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6723bacd3c60b11a2b4e6007338a54c6964c210116c3ccecb3bfc80e218afc8f
-size 334711857