Upload Matrix-Game 2.0 Base Distilled Model (Diffusers format)

Files changed (9) hide show

image_encoder/config.json ADDED Viewed

+{
+  "architectures": [
+    "CLIPVisionModel"
+  ],
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "dtype": "float32",
+  "hidden_act": "gelu",
+  "hidden_size": 1280,
+  "image_size": 224,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 32,
+  "patch_size": 14,
+  "projection_dim": 1024,
+  "transformers_version": "4.57.3"
+}

image_encoder/model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8eb46f477ef5e1859b659014aed6ca56cdc207c12cb7a0f9d61b4d80a1a7bb84
+size 2523128312

image_processor/preprocessor_config.json ADDED Viewed

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 224
+  }
+}

model_index.json ADDED Viewed

+{
+  "_class_name": "MatrixGameCausalDMDPipeline",
+  "_diffusers_version": "0.33.1",
+  "scheduler": [
+    "diffusers",
+    "SelfForcingFlowMatchScheduler"
+  ],
+  "transformer": [
+    "diffusers",
+    "MatrixGameWanModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKLWan"
+  ],
+  "image_encoder": [
+    "transformers",
+    "CLIPVisionModel"
+  ],
+  "image_processor": [
+    "transformers",
+    "CLIPImageProcessor"
+  ]
+}

scheduler/scheduler_config.json ADDED Viewed

+{
+  "_class_name": "SelfForcingFlowMatchScheduler",
+  "_diffusers_version": "0.33.1",
+  "num_train_timesteps": 1000,
+  "num_inference_steps": 1000,
+  "shift": 5.0,
+  "sigma_max": 1.0,
+  "sigma_min": 0.0,
+  "inverse_timesteps": false,
+  "extra_one_step": true,
+  "reverse_sigmas": false,
+  "training": true
+}

transformer/config.json ADDED Viewed

+{
+  "_class_name": "CausalMatrixGameWanModel",
+  "_diffusers_version": "0.33.1",
+  "hidden_size": 1536,
+  "num_attention_heads": 12,
+  "attention_head_dim": 128,
+  "in_channels": 36,
+  "out_channels": 16,
+  "num_layers": 30,
+  "ffn_dim": 8960,
+  "freq_dim": 256,
+  "eps": 1e-06,
+  "qk_norm": "rms_norm_across_heads",
+  "patch_size": [
+    1,
+    2,
+    2
+  ],
+  "action_config": {
+    "blocks": [
+      0,
+      1,
+      2,
+      3,
+      4,
+      5,
+      6,
+      7,
+      8,
+      9,
+      10,
+      11,
+      12,
+      13,
+      14
+    ],
+    "enable_keyboard": true,
+    "enable_mouse": true,
+    "heads_num": 16,
+    "hidden_size": 128,
+    "img_hidden_size": 1536,
+    "keyboard_dim_in": 4,
+    "keyboard_hidden_dim": 1024,
+    "mouse_dim_in": 2,
+    "mouse_hidden_dim": 1024,
+    "mouse_qk_dim_list": [
+      8,
+      28,
+      28
+    ],
+    "patch_size": [
+      1,
+      2,
+      2
+    ],
+    "qk_norm": true,
+    "qkv_bias": false,
+    "rope_dim_list": [
+      8,
+      28,
+      28
+    ],
+    "rope_theta": 256,
+    "vae_time_compression_ratio": 4,
+    "windows_size": 3
+  },
+  "image_dim": 1280,
+  "text_dim": 0,
+  "local_attn_size": 6,
+  "sink_size": 0,
+  "text_len": 512
+}

transformer/diffusion_pytorch_model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a8976aeac99a9ee62f3c51c6a359cb9cc2a52264eeff745deef1413a50dbd12
+size 6477085432

vae/config.json ADDED Viewed

+{
+  "_class_name": "AutoencoderKLWan",
+  "_diffusers_version": "0.33.1",
+  "attn_scales": [],
+  "base_dim": 96,
+  "dim_mult": [
+    1,
+    2,
+    4,
+    4
+  ],
+  "dropout": 0.0,
+  "latents_mean": [
+    -0.7571,
+    -0.7089,
+    -0.9113,
+    0.1075,
+    -0.1745,
+    0.9653,
+    -0.1517,
+    1.5508,
+    0.4134,
+    -0.0715,
+    0.5517,
+    -0.3632,
+    -0.1922,
+    -0.9497,
+    0.2503,
+    -0.2921
+  ],
+  "latents_std": [
+    2.8184,
+    1.4541,
+    2.3275,
+    2.6558,
+    1.2196,
+    1.7708,
+    2.6052,
+    2.0743,
+    3.2687,
+    2.1526,
+    2.8652,
+    1.5579,
+    1.6382,
+    1.1253,
+    2.8251,
+    1.916
+  ],
+  "num_res_blocks": 2,
+  "temperal_downsample": [
+    false,
+    true,
+    true
+  ],
+  "z_dim": 16
+}

vae/diffusion_pytorch_model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb8cbd00e0a2305d462ef144f2a2bdc625dca43ffe25fb50826994e672579805
+size 507591860