Trained for 0 epochs and 400 steps.

Trained with datasets ['text-embeds-sdxl', 'training-test']
Learning rate 4e-07, batch size 4, and 4 gradient accumulation steps.
Used DDPM noise scheduler for training with v_prediction prediction type and rescaled_betas_zero_snr=True
Using 'trailing' timestep spacing.
Base model: ptx0/terminus-xl-velocity-v2
VAE: madebyollin/sdxl-vae-fp16-fix

Files changed (7) hide show

README.md +6 -110
model_index.json +15 -7
scheduler/scheduler_config.json +10 -3
tokenizer/tokenizer_config.json +20 -23
tokenizer_2/tokenizer_config.json +28 -23
unet/config.json +1 -1
vae/config.json +3 -1

README.md CHANGED Viewed

@@ -58,116 +58,12 @@ You may reuse the base model text encoder for inference.
 ## Datasets
-### celebrities
-- Repeats: 0
-- Total number of images: 144
-- Total number of aspect buckets: 3
 - Resolution: 1.0 megapixels
 - Cropped: True
-- Crop style: random
-- Crop aspect: random
-### movieposters
-- Repeats: 0
-- Total number of images: 208
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### pixel-art
-- Repeats: 0
-- Total number of images: 112
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### moviecollection
-- Repeats: 0
-- Total number of images: 208
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### ethnic
-- Repeats: 0
-- Total number of images: 368
-- Total number of aspect buckets: 2
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### shutterstock
-- Repeats: 0
-- Total number of images: 2608
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### cinemamix-1mp
-- Repeats: 0
-- Total number of images: 1104
-- Total number of aspect buckets: 1
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### nsfw-1024
-- Repeats: 0
-- Total number of images: 1328
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### anatomy
-- Repeats: 5
-- Total number of images: 2032
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### bg20k-1024
-- Repeats: 0
-- Total number of images: 11136
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### yoga
-- Repeats: 0
-- Total number of images: 448
-- Total number of aspect buckets: 2
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### photo-aesthetics
-- Repeats: 0
-- Total number of images: 4128
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### text-1mp
-- Repeats: 5
-- Total number of images: 1616
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random
-### photo-concept-bucket
-- Repeats: 0
-- Total number of images: 70912
-- Total number of aspect buckets: 3
-- Resolution: 1.0 megapixels
-- Cropped: True
-- Crop style: random
-- Crop aspect: random

 ## Datasets
+### training-test
+- Repeats: 1
+- Total number of images: 192
+- Total number of aspect buckets: 17
 - Resolution: 1.0 megapixels
 - Cropped: True
+- Crop style: corner
+- Crop aspect: preserve

model_index.json CHANGED Viewed

@@ -1,19 +1,27 @@
 {
   "_class_name": "StableDiffusionXLPipeline",
-  "_diffusers_version": "0.28.0",
-  "_name_or_path": "ptx0/sdxl-base",
   "force_zeros_for_empty_prompt": true,
   "scheduler": [
     "diffusers",
-    "UniPCMultistepScheduler"
   ],
   "text_encoder": [
-    "transformers",
-    "CLIPTextModel"
   ],
   "text_encoder_2": [
-    "transformers",
-    "CLIPTextModelWithProjection"
   ],
   "tokenizer": [
     "transformers",

 {
   "_class_name": "StableDiffusionXLPipeline",
+  "_diffusers_version": "0.29.0.dev0",
+  "_name_or_path": "ptx0/terminus-xl-velocity-v2",
+  "feature_extractor": [
+    null,
+    null
+  ],
   "force_zeros_for_empty_prompt": true,
+  "image_encoder": [
+    null,
+    null
+  ],
   "scheduler": [
     "diffusers",
+    "EulerDiscreteScheduler"
   ],
   "text_encoder": [
+    null,
+    null
   ],
   "text_encoder_2": [
+    null,
+    null
   ],
   "tokenizer": [
     "transformers",

scheduler/scheduler_config.json CHANGED Viewed

@@ -1,17 +1,24 @@
 {
-  "_class_name": "UniPCMultistepScheduler",
-  "_diffusers_version": "0.28.0",
   "beta_end": 0.012,
   "beta_schedule": "scaled_linear",
   "beta_start": 0.00085,
   "clip_sample": false,
   "clip_sample_range": 1.0,
   "dynamic_thresholding_ratio": 0.995,
   "interpolation_type": "linear",
   "num_train_timesteps": 1000,
   "prediction_type": "v_prediction",
   "rescale_betas_zero_snr": true,
   "sample_max_value": 1.0,
   "skip_prk_steps": true,
-  "timestep_spacing": "trailing"
 }

 {
+  "_class_name": "EulerDiscreteScheduler",
+  "_diffusers_version": "0.29.0.dev0",
   "beta_end": 0.012,
   "beta_schedule": "scaled_linear",
   "beta_start": 0.00085,
   "clip_sample": false,
   "clip_sample_range": 1.0,
   "dynamic_thresholding_ratio": 0.995,
+  "final_sigmas_type": "zero",
   "interpolation_type": "linear",
   "num_train_timesteps": 1000,
   "prediction_type": "v_prediction",
   "rescale_betas_zero_snr": true,
   "sample_max_value": 1.0,
+  "sigma_max": null,
+  "sigma_min": null,
   "skip_prk_steps": true,
+  "steps_offset": 0,
+  "timestep_spacing": "trailing",
+  "timestep_type": "discrete",
+  "trained_betas": null,
+  "use_karras_sigmas": false
 }

tokenizer/tokenizer_config.json CHANGED Viewed

@@ -1,33 +1,30 @@
 {
   "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
   },
   "clean_up_tokenization_spaces": true,
   "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
   "errors": "replace",
   "model_max_length": 77,
   "pad_token": "<|endoftext|>",
   "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
   "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "49406": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49407": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
   },
+  "bos_token": "<|startoftext|>",
   "clean_up_tokenization_spaces": true,
   "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
   "errors": "replace",
   "model_max_length": 77,
   "pad_token": "<|endoftext|>",
   "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>"
 }

tokenizer_2/tokenizer_config.json CHANGED Viewed

@@ -1,33 +1,38 @@
 {
   "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
   },
   "clean_up_tokenization_spaces": true,
   "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
   "errors": "replace",
   "model_max_length": 77,
   "pad_token": "!",
   "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
   "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "!",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49406": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49407": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
   },
+  "bos_token": "<|startoftext|>",
   "clean_up_tokenization_spaces": true,
   "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
   "errors": "replace",
   "model_max_length": 77,
   "pad_token": "!",
   "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>"
 }

unet/config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_class_name": "UNet2DConditionModel",
   "_diffusers_version": "0.29.0.dev0",
-  "_name_or_path": "ptx0/terminus-xl-velocity-v2",
   "act_fn": "silu",
   "addition_embed_type": "text_time",
   "addition_embed_type_num_heads": 64,

 {
   "_class_name": "UNet2DConditionModel",
   "_diffusers_version": "0.29.0.dev0",
+  "_name_or_path": "/home/user/training/lite-models/checkpoint-400",
   "act_fn": "silu",
   "addition_embed_type": "text_time",
   "addition_embed_type_num_heads": 64,

vae/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.22.0.dev0",
   "_name_or_path": "madebyollin/sdxl-vae-fp16-fix",
   "act_fn": "silu",
   "block_out_channels": [
@@ -18,6 +18,8 @@
   "force_upcast": false,
   "in_channels": 3,
   "latent_channels": 4,
   "layers_per_block": 2,
   "norm_num_groups": 32,
   "out_channels": 3,

 {
   "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.29.0.dev0",
   "_name_or_path": "madebyollin/sdxl-vae-fp16-fix",
   "act_fn": "silu",
   "block_out_channels": [
   "force_upcast": false,
   "in_channels": 3,
   "latent_channels": 4,
+  "latents_mean": null,
+  "latents_std": null,
   "layers_per_block": 2,
   "norm_num_groups": 32,
   "out_channels": 3,