H1yori233 commited on
Commit
7b538fb
·
verified ·
1 Parent(s): 604b61c

Upload Matrix-Game 2.0 Base Distilled Model (Diffusers format)

Browse files
image_encoder/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CLIPVisionModel"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "dropout": 0.0,
7
+ "dtype": "float32",
8
+ "hidden_act": "gelu",
9
+ "hidden_size": 1280,
10
+ "image_size": 224,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5120,
14
+ "layer_norm_eps": 1e-05,
15
+ "model_type": "clip_vision_model",
16
+ "num_attention_heads": 16,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 32,
19
+ "patch_size": 14,
20
+ "projection_dim": 1024,
21
+ "transformers_version": "4.57.3"
22
+ }
image_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eb46f477ef5e1859b659014aed6ca56cdc207c12cb7a0f9d61b4d80a1a7bb84
3
+ size 2523128312
image_processor/preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 224
26
+ }
27
+ }
model_index.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MatrixGameCausalDMDPipeline",
3
+ "_diffusers_version": "0.33.1",
4
+ "scheduler": [
5
+ "diffusers",
6
+ "SelfForcingFlowMatchScheduler"
7
+ ],
8
+ "transformer": [
9
+ "diffusers",
10
+ "MatrixGameWanModel"
11
+ ],
12
+ "vae": [
13
+ "diffusers",
14
+ "AutoencoderKLWan"
15
+ ],
16
+ "image_encoder": [
17
+ "transformers",
18
+ "CLIPVisionModel"
19
+ ],
20
+ "image_processor": [
21
+ "transformers",
22
+ "CLIPImageProcessor"
23
+ ]
24
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "SelfForcingFlowMatchScheduler",
3
+ "_diffusers_version": "0.33.1",
4
+ "num_train_timesteps": 1000,
5
+ "num_inference_steps": 1000,
6
+ "shift": 5.0,
7
+ "sigma_max": 1.0,
8
+ "sigma_min": 0.0,
9
+ "inverse_timesteps": false,
10
+ "extra_one_step": true,
11
+ "reverse_sigmas": false,
12
+ "training": true
13
+ }
transformer/config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "CausalMatrixGameWanModel",
3
+ "_diffusers_version": "0.33.1",
4
+ "hidden_size": 1536,
5
+ "num_attention_heads": 12,
6
+ "attention_head_dim": 128,
7
+ "in_channels": 36,
8
+ "out_channels": 16,
9
+ "num_layers": 30,
10
+ "ffn_dim": 8960,
11
+ "freq_dim": 256,
12
+ "eps": 1e-06,
13
+ "qk_norm": "rms_norm_across_heads",
14
+ "patch_size": [
15
+ 1,
16
+ 2,
17
+ 2
18
+ ],
19
+ "action_config": {
20
+ "blocks": [
21
+ 0,
22
+ 1,
23
+ 2,
24
+ 3,
25
+ 4,
26
+ 5,
27
+ 6,
28
+ 7,
29
+ 8,
30
+ 9,
31
+ 10,
32
+ 11,
33
+ 12,
34
+ 13,
35
+ 14
36
+ ],
37
+ "enable_keyboard": true,
38
+ "enable_mouse": true,
39
+ "heads_num": 16,
40
+ "hidden_size": 128,
41
+ "img_hidden_size": 1536,
42
+ "keyboard_dim_in": 4,
43
+ "keyboard_hidden_dim": 1024,
44
+ "mouse_dim_in": 2,
45
+ "mouse_hidden_dim": 1024,
46
+ "mouse_qk_dim_list": [
47
+ 8,
48
+ 28,
49
+ 28
50
+ ],
51
+ "patch_size": [
52
+ 1,
53
+ 2,
54
+ 2
55
+ ],
56
+ "qk_norm": true,
57
+ "qkv_bias": false,
58
+ "rope_dim_list": [
59
+ 8,
60
+ 28,
61
+ 28
62
+ ],
63
+ "rope_theta": 256,
64
+ "vae_time_compression_ratio": 4,
65
+ "windows_size": 3
66
+ },
67
+ "image_dim": 1280,
68
+ "text_dim": 0,
69
+ "local_attn_size": 6,
70
+ "sink_size": 0,
71
+ "text_len": 512
72
+ }
transformer/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a8976aeac99a9ee62f3c51c6a359cb9cc2a52264eeff745deef1413a50dbd12
3
+ size 6477085432
vae/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKLWan",
3
+ "_diffusers_version": "0.33.1",
4
+ "attn_scales": [],
5
+ "base_dim": 96,
6
+ "dim_mult": [
7
+ 1,
8
+ 2,
9
+ 4,
10
+ 4
11
+ ],
12
+ "dropout": 0.0,
13
+ "latents_mean": [
14
+ -0.7571,
15
+ -0.7089,
16
+ -0.9113,
17
+ 0.1075,
18
+ -0.1745,
19
+ 0.9653,
20
+ -0.1517,
21
+ 1.5508,
22
+ 0.4134,
23
+ -0.0715,
24
+ 0.5517,
25
+ -0.3632,
26
+ -0.1922,
27
+ -0.9497,
28
+ 0.2503,
29
+ -0.2921
30
+ ],
31
+ "latents_std": [
32
+ 2.8184,
33
+ 1.4541,
34
+ 2.3275,
35
+ 2.6558,
36
+ 1.2196,
37
+ 1.7708,
38
+ 2.6052,
39
+ 2.0743,
40
+ 3.2687,
41
+ 2.1526,
42
+ 2.8652,
43
+ 1.5579,
44
+ 1.6382,
45
+ 1.1253,
46
+ 2.8251,
47
+ 1.916
48
+ ],
49
+ "num_res_blocks": 2,
50
+ "temperal_downsample": [
51
+ false,
52
+ true,
53
+ true
54
+ ],
55
+ "z_dim": 16
56
+ }
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb8cbd00e0a2305d462ef144f2a2bdc625dca43ffe25fb50826994e672579805
3
+ size 507591860