Increase training set resolution, use xt-1-1, linear learning rate

Files changed (5) hide show

image_encoder/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "stabilityai/stable-video-diffusion-img2vid-xt",
   "architectures": [
     "CLIPVisionModelWithProjection"
   ],
@@ -19,5 +19,5 @@
   "patch_size": 14,
   "projection_dim": 1024,
   "torch_dtype": "float16",
-  "transformers_version": "4.39.1"
 }

 {
+  "_name_or_path": "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
   "architectures": [
     "CLIPVisionModelWithProjection"
   ],
   "patch_size": 14,
   "projection_dim": 1024,
   "torch_dtype": "float16",
+  "transformers_version": "4.39.2"
 }

model_index.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_class_name": "StableVideoDiffusionPipeline",
   "_diffusers_version": "0.27.2",
-  "_name_or_path": "stabilityai/stable-video-diffusion-img2vid-xt",
   "feature_extractor": [
     "transformers",
     "CLIPImageProcessor"

 {
   "_class_name": "StableVideoDiffusionPipeline",
   "_diffusers_version": "0.27.2",
+  "_name_or_path": "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
   "feature_extractor": [
     "transformers",
     "CLIPImageProcessor"

unet/config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_class_name": "UNetSpatioTemporalConditionModel",
   "_diffusers_version": "0.27.2",
-  "_name_or_path": "stabilityai/stable-video-diffusion-img2vid-xt",
   "addition_time_embed_dim": 256,
   "block_out_channels": [
     320,

 {
   "_class_name": "UNetSpatioTemporalConditionModel",
   "_diffusers_version": "0.27.2",
+  "_name_or_path": "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
   "addition_time_embed_dim": 256,
   "block_out_channels": [
     320,

unet/diffusion_pytorch_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38ec7e8457ee7769a80ed49123ec4655bc695007f5d53fb4b6caab9dc391581a
 size 6098682464

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a87ea1431da38eaf77ade8b240b98850cd51dd904cac6efe43a81609bc409df
 size 6098682464

vae/config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_class_name": "AutoencoderKLTemporalDecoder",
   "_diffusers_version": "0.27.2",
-  "_name_or_path": "stabilityai/stable-video-diffusion-img2vid-xt",
   "block_out_channels": [
     128,
     256,

 {
   "_class_name": "AutoencoderKLTemporalDecoder",
   "_diffusers_version": "0.27.2",
+  "_name_or_path": "stabilityai/stable-video-diffusion-img2vid-xt-1-1",
   "block_out_channels": [
     128,
     256,