Jingya HF staff commited on
Commit
bb6116d
1 Parent(s): 2bfa13f
README.md CHANGED
@@ -1,3 +1,7 @@
1
  ---
2
  license: mit
3
  ---
 
 
 
 
 
1
  ---
2
  license: mit
3
  ---
4
+
5
+ This is random dummy model for stable video diffusion inspired by the [test script](https://github.com/huggingface/diffusers/blob/main/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py) in the diffusers library (script for creating the repo [here](https://github.com/JingyaHuang/neuron-playground/blob/main/stable_diffusion/utils/dummy_sd_video.py)).
6
+
7
+ This model aims for internal testing, please do not use it in other scenarios.
feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 32,
4
+ "width": 32
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 32
26
+ }
27
+ }
image_encoder/config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CLIPVisionModelWithProjection"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "hidden_act": "quick_gelu",
7
+ "hidden_size": 32,
8
+ "image_size": 32,
9
+ "initializer_factor": 1.0,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 37,
12
+ "layer_norm_eps": 1e-05,
13
+ "model_type": "clip_vision_model",
14
+ "num_attention_heads": 4,
15
+ "num_channels": 3,
16
+ "num_hidden_layers": 5,
17
+ "patch_size": 1,
18
+ "projection_dim": 32,
19
+ "torch_dtype": "float32",
20
+ "transformers_version": "4.36.2"
21
+ }
image_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf3d19058d12326e930c1086726f37574aa0918081c2fabac94ec922c7dd7765
3
+ size 300409
model_index.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableVideoDiffusionPipeline",
3
+ "_diffusers_version": "0.25.0",
4
+ "feature_extractor": [
5
+ "transformers",
6
+ "CLIPImageProcessor"
7
+ ],
8
+ "image_encoder": [
9
+ "transformers",
10
+ "CLIPVisionModelWithProjection"
11
+ ],
12
+ "scheduler": [
13
+ "diffusers",
14
+ "EulerDiscreteScheduler"
15
+ ],
16
+ "unet": [
17
+ "diffusers",
18
+ "UNetSpatioTemporalConditionModel"
19
+ ],
20
+ "vae": [
21
+ "diffusers",
22
+ "AutoencoderKLTemporalDecoder"
23
+ ]
24
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "EulerDiscreteScheduler",
3
+ "_diffusers_version": "0.25.0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "interpolation_type": "linear",
8
+ "num_train_timesteps": 1000,
9
+ "prediction_type": "v_prediction",
10
+ "rescale_betas_zero_snr": false,
11
+ "sigma_max": 700.0,
12
+ "sigma_min": 0.002,
13
+ "steps_offset": 1,
14
+ "timestep_spacing": "leading",
15
+ "timestep_type": "continuous",
16
+ "trained_betas": null,
17
+ "use_karras_sigmas": true
18
+ }
unet/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNetSpatioTemporalConditionModel",
3
+ "_diffusers_version": "0.25.0",
4
+ "addition_time_embed_dim": 32,
5
+ "block_out_channels": [
6
+ 32,
7
+ 64
8
+ ],
9
+ "cross_attention_dim": 32,
10
+ "down_block_types": [
11
+ "CrossAttnDownBlockSpatioTemporal",
12
+ "DownBlockSpatioTemporal"
13
+ ],
14
+ "in_channels": 8,
15
+ "layers_per_block": 2,
16
+ "num_attention_heads": 8,
17
+ "num_frames": 25,
18
+ "out_channels": 4,
19
+ "projection_class_embeddings_input_dim": 96,
20
+ "sample_size": 32,
21
+ "transformer_layers_per_block": 1,
22
+ "up_block_types": [
23
+ "UpBlockSpatioTemporal",
24
+ "CrossAttnUpBlockSpatioTemporal"
25
+ ]
26
+ }
unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b398b486e8695b43daed929a046ee76736b7070f728c7dffe10bd6a5f0fbef4
3
+ size 7439581
vae/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKLTemporalDecoder",
3
+ "_diffusers_version": "0.25.0",
4
+ "block_out_channels": [
5
+ 32,
6
+ 64
7
+ ],
8
+ "down_block_types": [
9
+ "DownEncoderBlock2D",
10
+ "DownEncoderBlock2D"
11
+ ],
12
+ "force_upcast": true,
13
+ "in_channels": 3,
14
+ "latent_channels": 4,
15
+ "layers_per_block": 1,
16
+ "out_channels": 3,
17
+ "sample_size": 32,
18
+ "scaling_factor": 0.18215
19
+ }
vae/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a798b775cbc6dcd80ebf1a4c320c81bd403523679dbde26a26c7adcd25a03d56
3
+ size 2752239