Ata Celen commited on about 1 month ago

Commit

dcaa3ad

1 Parent(s): bb12ece

Model Weights added

Files changed (22) hide show

qwen2-vl-3d/config.json +48 -0
qwen2-vl-3d/generation_config.json +14 -0
qwen2-vl-3d/model-00001-of-00004.safetensors +3 -0
qwen2-vl-3d/model-00002-of-00004.safetensors +3 -0
qwen2-vl-3d/model-00003-of-00004.safetensors +3 -0
qwen2-vl-3d/model-00004-of-00004.safetensors +3 -0
qwen2-vl-3d/model.safetensors.index.json +899 -0
qwen2-vl-3d/optimizer.pt +3 -0
qwen2-vl-3d/rng_state.pth +3 -0
qwen2-vl-3d/scheduler.pt +3 -0
qwen2-vl-3d/trainer_state.json +398 -0
residual-diffuser/args.json +97 -0
residual-diffuser/dataset_config.pkl +3 -0
residual-diffuser/diff.txt +1895 -0
residual-diffuser/diffusion_config.pkl +3 -0
residual-diffuser/model_config.pkl +3 -0
residual-diffuser/render_config.pkl +3 -0
residual-diffuser/state_58000.pt +3 -0
residual-diffuser/test_indices.txt +100 -0
residual-diffuser/train_indices.txt +691 -0
residual-diffuser/trainer_config.pkl +3 -0
residual-diffuser/val_indices.txt +87 -0

qwen2-vl-3d/config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "_name_or_path": "Qwen/Qwen2-VL-7B-Instruct",
+  "architectures": [
+    "Qwen2VLSpatialForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "image_token_id": 151655,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2_vl",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
+    ],
+    "rope_type": "default",
+    "type": "default"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.49.0.dev0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "video_token_id": 151656,
+  "vision_config": {
+    "in_chans": 3,
+    "model_type": "qwen2_vl",
+    "spatial_patch_size": 14,
+    "torch_dtype": "bfloat16"
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 151660
+}

qwen2-vl-3d/generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "attn_implementation": "flash_attention_2",
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.01,
+  "top_k": 1,
+  "top_p": 0.001,
+  "transformers_version": "4.49.0.dev0"
+}

qwen2-vl-3d/model-00001-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ebcaeaea7f52c327c50f91b36d0f3d63cd96b445572c129fdb2760b001bd323
+size 4963764072

qwen2-vl-3d/model-00002-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:638919d8742180acc5d1ba5016cb77d9c1d51eb0aaf4966410e82d2dcac48580
+size 4991495816

qwen2-vl-3d/model-00003-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff7bf17a78fd87ad5adf702a460330b1ae89ae2f121175eb6045f3a6a3c905fc
+size 4932751040

qwen2-vl-3d/model-00004-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:254fc35d50a309eefb1ef995ae67767137ad0a01b573b6ad30cf66cb6896e6a1
+size 1720377840

qwen2-vl-3d/model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,899 @@

+{
+  "metadata": {
+    "total_size": 16608291000
+  },
+  "weight_map": {
+    "adapter.bias": "model-00004-of-00004.safetensors",
+    "adapter.weight": "model-00004-of-00004.safetensors",
+    "diffuser.alphas_cumprod": "model-00004-of-00004.safetensors",
+    "diffuser.alphas_cumprod_prev": "model-00004-of-00004.safetensors",
+    "diffuser.betas": "model-00004-of-00004.safetensors",
+    "diffuser.log_one_minus_alphas_cumprod": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.residual_conv.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.residual_conv.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.0.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.1.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.2.conv.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.0.2.conv.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.residual_conv.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.residual_conv.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.0.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.1.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.2.conv.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.1.2.conv.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.residual_conv.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.residual_conv.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.0.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.downs.2.1.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.final_conv.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.final_conv.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.final_conv.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.final_conv.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.final_conv.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.final_conv.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block1.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.mid_block2.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.time_mlp.3.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.time_mlp.3.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.residual_conv.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.residual_conv.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.0.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.1.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.2.conv.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.0.2.conv.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.residual_conv.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.residual_conv.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.0.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.blocks.0.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.blocks.0.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.blocks.0.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.blocks.0.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.blocks.1.block.0.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.blocks.1.block.0.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.blocks.1.block.2.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.blocks.1.block.2.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.time_mlp.1.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.1.time_mlp.1.weight": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.2.conv.bias": "model-00004-of-00004.safetensors",
+    "diffuser.model.ups.1.2.conv.weight": "model-00004-of-00004.safetensors",
+    "diffuser.posterior_log_variance_clipped": "model-00004-of-00004.safetensors",
+    "diffuser.posterior_mean_coef1": "model-00004-of-00004.safetensors",
+    "diffuser.posterior_mean_coef2": "model-00004-of-00004.safetensors",
+    "diffuser.posterior_variance": "model-00004-of-00004.safetensors",
+    "diffuser.sqrt_alphas_cumprod": "model-00004-of-00004.safetensors",
+    "diffuser.sqrt_one_minus_alphas_cumprod": "model-00004-of-00004.safetensors",
+    "diffuser.sqrt_recip_alphas_cumprod": "model-00004-of-00004.safetensors",
+    "diffuser.sqrt_recipm1_alphas_cumprod": "model-00004-of-00004.safetensors",
+    "lm_head.weight": "model-00004-of-00004.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.norm.weight": "model-00004-of-00004.safetensors",
+    "visual.blocks.0.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.0.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.1.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.10.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.11.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.12.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.13.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.14.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.15.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.16.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.17.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.18.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.19.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.2.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.20.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.21.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.22.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.23.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.24.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.25.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.26.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.27.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.28.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.29.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.3.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.30.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.31.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.4.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.5.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.6.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.7.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.8.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.attn.proj.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.attn.proj.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.attn.qkv.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.attn.qkv.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.mlp.fc1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.mlp.fc1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.mlp.fc2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.mlp.fc2.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.norm1.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.norm1.weight": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.norm2.bias": "model-00001-of-00004.safetensors",
+    "visual.blocks.9.norm2.weight": "model-00001-of-00004.safetensors",
+    "visual.merger.ln_q.bias": "model-00001-of-00004.safetensors",
+    "visual.merger.ln_q.weight": "model-00001-of-00004.safetensors",
+    "visual.merger.mlp.0.bias": "model-00001-of-00004.safetensors",
+    "visual.merger.mlp.0.weight": "model-00001-of-00004.safetensors",
+    "visual.merger.mlp.2.bias": "model-00001-of-00004.safetensors",
+    "visual.merger.mlp.2.weight": "model-00001-of-00004.safetensors",
+    "visual.patch_embed.proj.weight": "model-00001-of-00004.safetensors"
+  }
+}

qwen2-vl-3d/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc71ef5f1c3be4d29c372baeea36b661da41256d91477608542b21e214e357dc
+size 165361722

qwen2-vl-3d/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a50d9ce198e2733053d8d4a9703a034370d3ce84ca4a64a639d3035f21bb559
+size 14244

qwen2-vl-3d/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b46bd00f15da7bf12c17740f5c0095e4932c178c4baf4ffcf2aaa705fd44155
+size 1064

qwen2-vl-3d/trainer_state.json ADDED Viewed

	@@ -0,0 +1,398 @@

+{
+  "best_metric": 1.1060227155685425,
+  "best_model_checkpoint": "qwen2-7b-instruct-trl-sft-housetour-l64-adapter/checkpoint-500",
+  "epoch": 3.4246575342465753,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.00684931506849315,
+      "grad_norm": 18.83251953125,
+      "learning_rate": 1.1363636363636363e-08,
+      "loss": 2.0308,
+      "step": 1
+    },
+    {
+      "epoch": 0.0684931506849315,
+      "grad_norm": 16.91636848449707,
+      "learning_rate": 1.1363636363636363e-07,
+      "loss": 2.2756,
+      "step": 10
+    },
+    {
+      "epoch": 0.136986301369863,
+      "grad_norm": 16.575881958007812,
+      "learning_rate": 2.2727272727272726e-07,
+      "loss": 2.2648,
+      "step": 20
+    },
+    {
+      "epoch": 0.2054794520547945,
+      "grad_norm": 10.142265319824219,
+      "learning_rate": 3.4090909090909085e-07,
+      "loss": 2.103,
+      "step": 30
+    },
+    {
+      "epoch": 0.273972602739726,
+      "grad_norm": 10.252799034118652,
+      "learning_rate": 4.545454545454545e-07,
+      "loss": 1.9579,
+      "step": 40
+    },
+    {
+      "epoch": 0.3424657534246575,
+      "grad_norm": 10.1475830078125,
+      "learning_rate": 5.681818181818182e-07,
+      "loss": 1.7427,
+      "step": 50
+    },
+    {
+      "epoch": 0.410958904109589,
+      "grad_norm": 6.072550296783447,
+      "learning_rate": 6.818181818181817e-07,
+      "loss": 1.6655,
+      "step": 60
+    },
+    {
+      "epoch": 0.4794520547945205,
+      "grad_norm": 4.325073719024658,
+      "learning_rate": 7.954545454545454e-07,
+      "loss": 1.5283,
+      "step": 70
+    },
+    {
+      "epoch": 0.547945205479452,
+      "grad_norm": 1.543124794960022,
+      "learning_rate": 9.09090909090909e-07,
+      "loss": 1.3892,
+      "step": 80
+    },
+    {
+      "epoch": 0.6164383561643836,
+      "grad_norm": 1.4148296117782593,
+      "learning_rate": 9.999987694108851e-07,
+      "loss": 1.2604,
+      "step": 90
+    },
+    {
+      "epoch": 0.684931506849315,
+      "grad_norm": 1.1791110038757324,
+      "learning_rate": 9.999556994278908e-07,
+      "loss": 1.1968,
+      "step": 100
+    },
+    {
+      "epoch": 0.7534246575342466,
+      "grad_norm": 0.6725325584411621,
+      "learning_rate": 9.99851106046421e-07,
+      "loss": 1.2527,
+      "step": 110
+    },
+    {
+      "epoch": 0.821917808219178,
+      "grad_norm": 0.6848694682121277,
+      "learning_rate": 9.996850021374967e-07,
+      "loss": 1.1948,
+      "step": 120
+    },
+    {
+      "epoch": 0.8904109589041096,
+      "grad_norm": 0.7549965977668762,
+      "learning_rate": 9.994574081414829e-07,
+      "loss": 1.1981,
+      "step": 130
+    },
+    {
+      "epoch": 0.958904109589041,
+      "grad_norm": 0.7096678614616394,
+      "learning_rate": 9.991683520655733e-07,
+      "loss": 1.1431,
+      "step": 140
+    },
+    {
+      "epoch": 1.0273972602739727,
+      "grad_norm": 0.5900934934616089,
+      "learning_rate": 9.988178694803437e-07,
+      "loss": 1.1278,
+      "step": 150
+    },
+    {
+      "epoch": 1.095890410958904,
+      "grad_norm": 0.5475574135780334,
+      "learning_rate": 9.98406003515375e-07,
+      "loss": 1.0992,
+      "step": 160
+    },
+    {
+      "epoch": 1.1643835616438356,
+      "grad_norm": 0.7482232451438904,
+      "learning_rate": 9.979328048539456e-07,
+      "loss": 1.1284,
+      "step": 170
+    },
+    {
+      "epoch": 1.2328767123287672,
+      "grad_norm": 0.5973280072212219,
+      "learning_rate": 9.973983317267942e-07,
+      "loss": 1.0852,
+      "step": 180
+    },
+    {
+      "epoch": 1.3013698630136985,
+      "grad_norm": 0.6261045932769775,
+      "learning_rate": 9.968026499049549e-07,
+      "loss": 1.1337,
+      "step": 190
+    },
+    {
+      "epoch": 1.36986301369863,
+      "grad_norm": 0.5769844055175781,
+      "learning_rate": 9.961458326916622e-07,
+      "loss": 1.0631,
+      "step": 200
+    },
+    {
+      "epoch": 1.4383561643835616,
+      "grad_norm": 0.5357353687286377,
+      "learning_rate": 9.95427960913332e-07,
+      "loss": 1.0776,
+      "step": 210
+    },
+    {
+      "epoch": 1.5068493150684932,
+      "grad_norm": 0.4853924810886383,
+      "learning_rate": 9.946491229096141e-07,
+      "loss": 1.1915,
+      "step": 220
+    },
+    {
+      "epoch": 1.5753424657534247,
+      "grad_norm": 0.46966347098350525,
+      "learning_rate": 9.93809414522522e-07,
+      "loss": 1.1294,
+      "step": 230
+    },
+    {
+      "epoch": 1.643835616438356,
+      "grad_norm": 0.4522402286529541,
+      "learning_rate": 9.929089390846387e-07,
+      "loss": 1.1039,
+      "step": 240
+    },
+    {
+      "epoch": 1.7123287671232876,
+      "grad_norm": 0.5600599646568298,
+      "learning_rate": 9.919478074064001e-07,
+      "loss": 1.1376,
+      "step": 250
+    },
+    {
+      "epoch": 1.7808219178082192,
+      "grad_norm": 0.6039386987686157,
+      "learning_rate": 9.9092613776246e-07,
+      "loss": 0.9726,
+      "step": 260
+    },
+    {
+      "epoch": 1.8493150684931505,
+      "grad_norm": 0.49978339672088623,
+      "learning_rate": 9.89844055877135e-07,
+      "loss": 1.0931,
+      "step": 270
+    },
+    {
+      "epoch": 1.9178082191780823,
+      "grad_norm": 0.5846579074859619,
+      "learning_rate": 9.887016949089332e-07,
+      "loss": 0.9936,
+      "step": 280
+    },
+    {
+      "epoch": 1.9863013698630136,
+      "grad_norm": 0.7952700257301331,
+      "learning_rate": 9.874991954341681e-07,
+      "loss": 0.9754,
+      "step": 290
+    },
+    {
+      "epoch": 2.0547945205479454,
+      "grad_norm": 0.4901112914085388,
+      "learning_rate": 9.862367054296588e-07,
+      "loss": 1.0696,
+      "step": 300
+    },
+    {
+      "epoch": 2.1232876712328768,
+      "grad_norm": 0.5387308597564697,
+      "learning_rate": 9.84914380254522e-07,
+      "loss": 1.0019,
+      "step": 310
+    },
+    {
+      "epoch": 2.191780821917808,
+      "grad_norm": 0.5762762427330017,
+      "learning_rate": 9.83532382631052e-07,
+      "loss": 1.033,
+      "step": 320
+    },
+    {
+      "epoch": 2.26027397260274,
+      "grad_norm": 0.4994489848613739,
+      "learning_rate": 9.82090882624698e-07,
+      "loss": 0.9562,
+      "step": 330
+    },
+    {
+      "epoch": 2.328767123287671,
+      "grad_norm": 0.5436204671859741,
+      "learning_rate": 9.805900576231357e-07,
+      "loss": 0.9257,
+      "step": 340
+    },
+    {
+      "epoch": 2.3972602739726026,
+      "grad_norm": 0.6050807237625122,
+      "learning_rate": 9.790300923144372e-07,
+      "loss": 1.0112,
+      "step": 350
+    },
+    {
+      "epoch": 2.4657534246575343,
+      "grad_norm": 0.41216209530830383,
+      "learning_rate": 9.77411178664346e-07,
+      "loss": 0.9853,
+      "step": 360
+    },
+    {
+      "epoch": 2.5342465753424657,
+      "grad_norm": 0.4330998957157135,
+      "learning_rate": 9.75733515892652e-07,
+      "loss": 1.0301,
+      "step": 370
+    },
+    {
+      "epoch": 2.602739726027397,
+      "grad_norm": 0.4711519479751587,
+      "learning_rate": 9.739973104486777e-07,
+      "loss": 0.9389,
+      "step": 380
+    },
+    {
+      "epoch": 2.671232876712329,
+      "grad_norm": 0.5700051784515381,
+      "learning_rate": 9.722027759858714e-07,
+      "loss": 0.9781,
+      "step": 390
+    },
+    {
+      "epoch": 2.73972602739726,
+      "grad_norm": 0.36050090193748474,
+      "learning_rate": 9.703501333355166e-07,
+      "loss": 1.0553,
+      "step": 400
+    },
+    {
+      "epoch": 2.808219178082192,
+      "grad_norm": 0.41387563943862915,
+      "learning_rate": 9.68439610479557e-07,
+      "loss": 1.0056,
+      "step": 410
+    },
+    {
+      "epoch": 2.8767123287671232,
+      "grad_norm": 0.4400821030139923,
+      "learning_rate": 9.664714425225413e-07,
+      "loss": 0.9877,
+      "step": 420
+    },
+    {
+      "epoch": 2.9452054794520546,
+      "grad_norm": 0.4176802635192871,
+      "learning_rate": 9.644458716626911e-07,
+      "loss": 0.9547,
+      "step": 430
+    },
+    {
+      "epoch": 3.0136986301369864,
+      "grad_norm": 0.3389221727848053,
+      "learning_rate": 9.623631471620979e-07,
+      "loss": 1.026,
+      "step": 440
+    },
+    {
+      "epoch": 3.0821917808219177,
+      "grad_norm": 0.36593514680862427,
+      "learning_rate": 9.602235253160481e-07,
+      "loss": 1.0277,
+      "step": 450
+    },
+    {
+      "epoch": 3.1506849315068495,
+      "grad_norm": 0.3649665117263794,
+      "learning_rate": 9.580272694214854e-07,
+      "loss": 0.9359,
+      "step": 460
+    },
+    {
+      "epoch": 3.219178082191781,
+      "grad_norm": 0.34383371472358704,
+      "learning_rate": 9.557746497446085e-07,
+      "loss": 0.9152,
+      "step": 470
+    },
+    {
+      "epoch": 3.287671232876712,
+      "grad_norm": 0.34904035925865173,
+      "learning_rate": 9.53465943487614e-07,
+      "loss": 0.9315,
+      "step": 480
+    },
+    {
+      "epoch": 3.356164383561644,
+      "grad_norm": 0.34317487478256226,
+      "learning_rate": 9.511014347545837e-07,
+      "loss": 0.9726,
+      "step": 490
+    },
+    {
+      "epoch": 3.4246575342465753,
+      "grad_norm": 0.30464446544647217,
+      "learning_rate": 9.48681414516524e-07,
+      "loss": 0.9659,
+      "step": 500
+    },
+    {
+      "epoch": 3.4246575342465753,
+      "eval_loss": 1.1060227155685425,
+      "eval_runtime": 641.2039,
+      "eval_samples_per_second": 0.203,
+      "eval_steps_per_second": 0.203,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2920,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.5300059477526943e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

residual-diffuser/args.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+    "action_weight": 1,
+    "add_extras": {
+        "_string": "<bound method Parser.add_extras of Parser(prog='train_tour.py', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)>",
+        "_type": "python_object (type = method)",
+        "_value": "gASVlAQAAAAAAACMCGJ1aWx0aW5zlIwHZ2V0YXR0cpSTlIwIX19tYWluX1+UjAZQYXJzZXKUk5QpgZR9lCiMCHNldF9zZWVklGgCaAZoCIaUUpSMCHNhdmVwYXRolIwnbG9ncy9tYXplMmQtbGFyZ2UtdjEvZGlmZnVzaW9uL0gzODRfVDE2lIwKbm9ybWFsaXplcpSMEExpbWl0c05vcm1hbGl6ZXKUjAVta2RpcpRoAmgGaA+GlFKUjApnZXRfY29tbWl0lGgCaAZoEoaUUpSMCGV4cF9uYW1llIwSZGlmZnVzaW9uL0gzODRfVDE2lIwLc2FtcGxlX2ZyZXGUTegDjAdob3Jpem9ulE2AAYwGY29uZmlnlIwNY29uZmlnLm1hemUyZJSMDWV2YWxfZnN0cmluZ3OUaAJoBmgbhpRSlIwKYmF0Y2hfc2l6ZZRLAYwGZGV2aWNllIwEY3VkYZSMC25fcmVmZXJlbmNllEsyjA1zYXZlX3BhcmFsbGVslImMGWdyYWRpZW50X2FjY3VtdWxhdGVfZXZlcnmUSwiMDWFjdGlvbl93ZWlnaHSUSwGMCWRpZmZ1c2lvbpSMGG1vZGVscy5HYXVzc2lhbkRpZmZ1c2lvbpSMBmJ1Y2tldJROjAZwcmVmaXiUjApkaWZmdXNpb24vlIwNbl90cmFpbl9zdGVwc5RHQO1MAAAAAACMC3JlYWRfY29uZmlnlGgCaAZoK4aUUpSMDWxvc3NfZGlzY291bnSUSwGMEW5fc3RlcHNfcGVyX2Vwb2NolE1g6owMbG9zc193ZWlnaHRzlE6MD21heF9wYXRoX2xlbmd0aJRNQJyMDWNsaXBfZGVub2lzZWSUiIwFbW9kZWyUjBNtb2RlbHMuVGVtcG9yYWxVbmV0lIwJZW1hX2RlY2F5lEc/79cKPXCj14wPcHJlZGljdF9lcHNpbG9ulIiMB2xvZ2Jhc2WUjARsb2dzlIwRbl9kaWZmdXNpb25fc3RlcHOUSxCMB2RhdGFzZXSUjA9tYXplMmQtbGFyZ2UtdjGUjA1sZWFybmluZ19yYXRllEc+1Pi1iONo8YwLdXNlX3BhZGRpbmeUiYwKYWRkX2V4dHJhc5RoAmgGaD6GlFKUjAlkaW1fbXVsdHOUSwFLBEsIh5SMCWxvc3NfdHlwZZSMBnNwbGluZZSMCW5fc2FtcGxlc5RLCowIcmVuZGVyZXKUjBR1dGlscy5NYXplMmRSZW5kZXJlcpSMCXNhdmVfZnJlcZRN0AeMEWdlbmVyYXRlX2V4cF9uYW1llGgCaAZoSYaUUpSMBmxvYWRlcpSMFGRhdGFzZXRzLkdvYWxEYXRhc2V0lIwGY29tbWl0lIwvMTNiNGQ0MDRiZGJkOWQwZDc0YzA4ZDVhNTFlZTM5Zjg0ZTc2NTgzMCBtYXplMmSUjAduX3NhdmVzlEsyjBN0ZXJtaW5hdGlvbl9wZW5hbHR5lE6MDnByZXByb2Nlc3NfZm5zlF2UjBRtYXplMmRfc2V0X3Rlcm1pbmFsc5RhjAlzYXZlX2RpZmaUaAJoBmhVhpRSlHViaD6GlFKULg=="
+    },
+    "batch_size": 1,
+    "bucket": null,
+    "clip_denoised": true,
+    "commit": "13b4d404bdbd9d0d74c08d5a51ee39f84e765830 maze2d",
+    "config": "config.maze2d",
+    "dataset": "maze2d-large-v1",
+    "device": "cuda",
+    "diffusion": "models.GaussianDiffusion",
+    "dim_mults": {
+        "_type": "tuple",
+        "_value": [
+            1,
+            4,
+            8
+        ]
+    },
+    "ema_decay": 0.995,
+    "eval_fstrings": {
+        "_string": "<bound method Parser.eval_fstrings of Parser(prog='train_tour.py', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)>",
+        "_type": "python_object (type = method)",
+        "_value": "gASVlAQAAAAAAACMCGJ1aWx0aW5zlIwHZ2V0YXR0cpSTlIwIX19tYWluX1+UjAZQYXJzZXKUk5QpgZR9lCiMCHNldF9zZWVklGgCaAZoCIaUUpSMCHNhdmVwYXRolIwnbG9ncy9tYXplMmQtbGFyZ2UtdjEvZGlmZnVzaW9uL0gzODRfVDE2lIwKbm9ybWFsaXplcpSMEExpbWl0c05vcm1hbGl6ZXKUjAVta2RpcpRoAmgGaA+GlFKUjApnZXRfY29tbWl0lGgCaAZoEoaUUpSMCGV4cF9uYW1llIwSZGlmZnVzaW9uL0gzODRfVDE2lIwLc2FtcGxlX2ZyZXGUTegDjAdob3Jpem9ulE2AAYwGY29uZmlnlIwNY29uZmlnLm1hemUyZJSMDWV2YWxfZnN0cmluZ3OUaAJoBmgbhpRSlIwKYmF0Y2hfc2l6ZZRLAYwGZGV2aWNllIwEY3VkYZSMC25fcmVmZXJlbmNllEsyjA1zYXZlX3BhcmFsbGVslImMGWdyYWRpZW50X2FjY3VtdWxhdGVfZXZlcnmUSwiMDWFjdGlvbl93ZWlnaHSUSwGMCWRpZmZ1c2lvbpSMGG1vZGVscy5HYXVzc2lhbkRpZmZ1c2lvbpSMBmJ1Y2tldJROjAZwcmVmaXiUjApkaWZmdXNpb24vlIwNbl90cmFpbl9zdGVwc5RHQO1MAAAAAACMC3JlYWRfY29uZmlnlGgCaAZoK4aUUpSMDWxvc3NfZGlzY291bnSUSwGMEW5fc3RlcHNfcGVyX2Vwb2NolE1g6owMbG9zc193ZWlnaHRzlE6MD21heF9wYXRoX2xlbmd0aJRNQJyMDWNsaXBfZGVub2lzZWSUiIwFbW9kZWyUjBNtb2RlbHMuVGVtcG9yYWxVbmV0lIwJZW1hX2RlY2F5lEc/79cKPXCj14wPcHJlZGljdF9lcHNpbG9ulIiMB2xvZ2Jhc2WUjARsb2dzlIwRbl9kaWZmdXNpb25fc3RlcHOUSxCMB2RhdGFzZXSUjA9tYXplMmQtbGFyZ2UtdjGUjA1sZWFybmluZ19yYXRllEc+1Pi1iONo8YwLdXNlX3BhZGRpbmeUiYwKYWRkX2V4dHJhc5RoAmgGaD6GlFKUjAlkaW1fbXVsdHOUSwFLBEsIh5SMCWxvc3NfdHlwZZSMBnNwbGluZZSMCW5fc2FtcGxlc5RLCowIcmVuZGVyZXKUjBR1dGlscy5NYXplMmRSZW5kZXJlcpSMCXNhdmVfZnJlcZRN0AeMEWdlbmVyYXRlX2V4cF9uYW1llGgCaAZoSYaUUpSMBmxvYWRlcpSMFGRhdGFzZXRzLkdvYWxEYXRhc2V0lIwGY29tbWl0lIwvMTNiNGQ0MDRiZGJkOWQwZDc0YzA4ZDVhNTFlZTM5Zjg0ZTc2NTgzMCBtYXplMmSUjAduX3NhdmVzlEsyjBN0ZXJtaW5hdGlvbl9wZW5hbHR5lE6MDnByZXByb2Nlc3NfZm5zlF2UjBRtYXplMmRfc2V0X3Rlcm1pbmFsc5RhjAlzYXZlX2RpZmaUaAJoBmhVhpRSlHViaBuGlFKULg=="
+    },
+    "exp_name": "diffusion/H384_T16",
+    "generate_exp_name": {
+        "_string": "<bound method Parser.generate_exp_name of Parser(prog='train_tour.py', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)>",
+        "_type": "python_object (type = method)",
+        "_value": "gASVlAQAAAAAAACMCGJ1aWx0aW5zlIwHZ2V0YXR0cpSTlIwIX19tYWluX1+UjAZQYXJzZXKUk5QpgZR9lCiMCHNldF9zZWVklGgCaAZoCIaUUpSMCHNhdmVwYXRolIwnbG9ncy9tYXplMmQtbGFyZ2UtdjEvZGlmZnVzaW9uL0gzODRfVDE2lIwKbm9ybWFsaXplcpSMEExpbWl0c05vcm1hbGl6ZXKUjAVta2RpcpRoAmgGaA+GlFKUjApnZXRfY29tbWl0lGgCaAZoEoaUUpSMCGV4cF9uYW1llIwSZGlmZnVzaW9uL0gzODRfVDE2lIwLc2FtcGxlX2ZyZXGUTegDjAdob3Jpem9ulE2AAYwGY29uZmlnlIwNY29uZmlnLm1hemUyZJSMDWV2YWxfZnN0cmluZ3OUaAJoBmgbhpRSlIwKYmF0Y2hfc2l6ZZRLAYwGZGV2aWNllIwEY3VkYZSMC25fcmVmZXJlbmNllEsyjA1zYXZlX3BhcmFsbGVslImMGWdyYWRpZW50X2FjY3VtdWxhdGVfZXZlcnmUSwiMDWFjdGlvbl93ZWlnaHSUSwGMCWRpZmZ1c2lvbpSMGG1vZGVscy5HYXVzc2lhbkRpZmZ1c2lvbpSMBmJ1Y2tldJROjAZwcmVmaXiUjApkaWZmdXNpb24vlIwNbl90cmFpbl9zdGVwc5RHQO1MAAAAAACMC3JlYWRfY29uZmlnlGgCaAZoK4aUUpSMDWxvc3NfZGlzY291bnSUSwGMEW5fc3RlcHNfcGVyX2Vwb2NolE1g6owMbG9zc193ZWlnaHRzlE6MD21heF9wYXRoX2xlbmd0aJRNQJyMDWNsaXBfZGVub2lzZWSUiIwFbW9kZWyUjBNtb2RlbHMuVGVtcG9yYWxVbmV0lIwJZW1hX2RlY2F5lEc/79cKPXCj14wPcHJlZGljdF9lcHNpbG9ulIiMB2xvZ2Jhc2WUjARsb2dzlIwRbl9kaWZmdXNpb25fc3RlcHOUSxCMB2RhdGFzZXSUjA9tYXplMmQtbGFyZ2UtdjGUjA1sZWFybmluZ19yYXRllEc+1Pi1iONo8YwLdXNlX3BhZGRpbmeUiYwKYWRkX2V4dHJhc5RoAmgGaD6GlFKUjAlkaW1fbXVsdHOUSwFLBEsIh5SMCWxvc3NfdHlwZZSMBnNwbGluZZSMCW5fc2FtcGxlc5RLCowIcmVuZGVyZXKUjBR1dGlscy5NYXplMmRSZW5kZXJlcpSMCXNhdmVfZnJlcZRN0AeMEWdlbmVyYXRlX2V4cF9uYW1llGgCaAZoSYaUUpSMBmxvYWRlcpSMFGRhdGFzZXRzLkdvYWxEYXRhc2V0lIwGY29tbWl0lIwvMTNiNGQ0MDRiZGJkOWQwZDc0YzA4ZDVhNTFlZTM5Zjg0ZTc2NTgzMCBtYXplMmSUjAduX3NhdmVzlEsyjBN0ZXJtaW5hdGlvbl9wZW5hbHR5lE6MDnByZXByb2Nlc3NfZm5zlF2UjBRtYXplMmRfc2V0X3Rlcm1pbmFsc5RhjAlzYXZlX2RpZmaUaAJoBmhVhpRSlHViaEmGlFKULg=="
+    },
+    "get_commit": {
+        "_string": "<bound method Parser.get_commit of Parser(prog='train_tour.py', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)>",
+        "_type": "python_object (type = method)",
+        "_value": "gASVlAQAAAAAAACMCGJ1aWx0aW5zlIwHZ2V0YXR0cpSTlIwIX19tYWluX1+UjAZQYXJzZXKUk5QpgZR9lCiMCHNldF9zZWVklGgCaAZoCIaUUpSMCHNhdmVwYXRolIwnbG9ncy9tYXplMmQtbGFyZ2UtdjEvZGlmZnVzaW9uL0gzODRfVDE2lIwKbm9ybWFsaXplcpSMEExpbWl0c05vcm1hbGl6ZXKUjAVta2RpcpRoAmgGaA+GlFKUjApnZXRfY29tbWl0lGgCaAZoEoaUUpSMCGV4cF9uYW1llIwSZGlmZnVzaW9uL0gzODRfVDE2lIwLc2FtcGxlX2ZyZXGUTegDjAdob3Jpem9ulE2AAYwGY29uZmlnlIwNY29uZmlnLm1hemUyZJSMDWV2YWxfZnN0cmluZ3OUaAJoBmgbhpRSlIwKYmF0Y2hfc2l6ZZRLAYwGZGV2aWNllIwEY3VkYZSMC25fcmVmZXJlbmNllEsyjA1zYXZlX3BhcmFsbGVslImMGWdyYWRpZW50X2FjY3VtdWxhdGVfZXZlcnmUSwiMDWFjdGlvbl93ZWlnaHSUSwGMCWRpZmZ1c2lvbpSMGG1vZGVscy5HYXVzc2lhbkRpZmZ1c2lvbpSMBmJ1Y2tldJROjAZwcmVmaXiUjApkaWZmdXNpb24vlIwNbl90cmFpbl9zdGVwc5RHQO1MAAAAAACMC3JlYWRfY29uZmlnlGgCaAZoK4aUUpSMDWxvc3NfZGlzY291bnSUSwGMEW5fc3RlcHNfcGVyX2Vwb2NolE1g6owMbG9zc193ZWlnaHRzlE6MD21heF9wYXRoX2xlbmd0aJRNQJyMDWNsaXBfZGVub2lzZWSUiIwFbW9kZWyUjBNtb2RlbHMuVGVtcG9yYWxVbmV0lIwJZW1hX2RlY2F5lEc/79cKPXCj14wPcHJlZGljdF9lcHNpbG9ulIiMB2xvZ2Jhc2WUjARsb2dzlIwRbl9kaWZmdXNpb25fc3RlcHOUSxCMB2RhdGFzZXSUjA9tYXplMmQtbGFyZ2UtdjGUjA1sZWFybmluZ19yYXRllEc+1Pi1iONo8YwLdXNlX3BhZGRpbmeUiYwKYWRkX2V4dHJhc5RoAmgGaD6GlFKUjAlkaW1fbXVsdHOUSwFLBEsIh5SMCWxvc3NfdHlwZZSMBnNwbGluZZSMCW5fc2FtcGxlc5RLCowIcmVuZGVyZXKUjBR1dGlscy5NYXplMmRSZW5kZXJlcpSMCXNhdmVfZnJlcZRN0AeMEWdlbmVyYXRlX2V4cF9uYW1llGgCaAZoSYaUUpSMBmxvYWRlcpSMFGRhdGFzZXRzLkdvYWxEYXRhc2V0lIwGY29tbWl0lIwvMTNiNGQ0MDRiZGJkOWQwZDc0YzA4ZDVhNTFlZTM5Zjg0ZTc2NTgzMCBtYXplMmSUjAduX3NhdmVzlEsyjBN0ZXJtaW5hdGlvbl9wZW5hbHR5lE6MDnByZXByb2Nlc3NfZm5zlF2UjBRtYXplMmRfc2V0X3Rlcm1pbmFsc5RhjAlzYXZlX2RpZmaUaAJoBmhVhpRSlHViaBKGlFKULg=="
+    },
+    "gradient_accumulate_every": 8,
+    "horizon": 384,
+    "learning_rate": 5e-06,
+    "loader": "datasets.GoalDataset",
+    "logbase": "logs",
+    "loss_discount": 1,
+    "loss_type": "spline",
+    "loss_weights": null,
+    "max_path_length": 40000,
+    "mkdir": {
+        "_string": "<bound method Parser.mkdir of Parser(prog='train_tour.py', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)>",
+        "_type": "python_object (type = method)",
+        "_value": "gASVlAQAAAAAAACMCGJ1aWx0aW5zlIwHZ2V0YXR0cpSTlIwIX19tYWluX1+UjAZQYXJzZXKUk5QpgZR9lCiMCHNldF9zZWVklGgCaAZoCIaUUpSMCHNhdmVwYXRolIwnbG9ncy9tYXplMmQtbGFyZ2UtdjEvZGlmZnVzaW9uL0gzODRfVDE2lIwKbm9ybWFsaXplcpSMEExpbWl0c05vcm1hbGl6ZXKUjAVta2RpcpRoAmgGaA+GlFKUjApnZXRfY29tbWl0lGgCaAZoEoaUUpSMCGV4cF9uYW1llIwSZGlmZnVzaW9uL0gzODRfVDE2lIwLc2FtcGxlX2ZyZXGUTegDjAdob3Jpem9ulE2AAYwGY29uZmlnlIwNY29uZmlnLm1hemUyZJSMDWV2YWxfZnN0cmluZ3OUaAJoBmgbhpRSlIwKYmF0Y2hfc2l6ZZRLAYwGZGV2aWNllIwEY3VkYZSMC25fcmVmZXJlbmNllEsyjA1zYXZlX3BhcmFsbGVslImMGWdyYWRpZW50X2FjY3VtdWxhdGVfZXZlcnmUSwiMDWFjdGlvbl93ZWlnaHSUSwGMCWRpZmZ1c2lvbpSMGG1vZGVscy5HYXVzc2lhbkRpZmZ1c2lvbpSMBmJ1Y2tldJROjAZwcmVmaXiUjApkaWZmdXNpb24vlIwNbl90cmFpbl9zdGVwc5RHQO1MAAAAAACMC3JlYWRfY29uZmlnlGgCaAZoK4aUUpSMDWxvc3NfZGlzY291bnSUSwGMEW5fc3RlcHNfcGVyX2Vwb2NolE1g6owMbG9zc193ZWlnaHRzlE6MD21heF9wYXRoX2xlbmd0aJRNQJyMDWNsaXBfZGVub2lzZWSUiIwFbW9kZWyUjBNtb2RlbHMuVGVtcG9yYWxVbmV0lIwJZW1hX2RlY2F5lEc/79cKPXCj14wPcHJlZGljdF9lcHNpbG9ulIiMB2xvZ2Jhc2WUjARsb2dzlIwRbl9kaWZmdXNpb25fc3RlcHOUSxCMB2RhdGFzZXSUjA9tYXplMmQtbGFyZ2UtdjGUjA1sZWFybmluZ19yYXRllEc+1Pi1iONo8YwLdXNlX3BhZGRpbmeUiYwKYWRkX2V4dHJhc5RoAmgGaD6GlFKUjAlkaW1fbXVsdHOUSwFLBEsIh5SMCWxvc3NfdHlwZZSMBnNwbGluZZSMCW5fc2FtcGxlc5RLCowIcmVuZGVyZXKUjBR1dGlscy5NYXplMmRSZW5kZXJlcpSMCXNhdmVfZnJlcZRN0AeMEWdlbmVyYXRlX2V4cF9uYW1llGgCaAZoSYaUUpSMBmxvYWRlcpSMFGRhdGFzZXRzLkdvYWxEYXRhc2V0lIwGY29tbWl0lIwvMTNiNGQ0MDRiZGJkOWQwZDc0YzA4ZDVhNTFlZTM5Zjg0ZTc2NTgzMCBtYXplMmSUjAduX3NhdmVzlEsyjBN0ZXJtaW5hdGlvbl9wZW5hbHR5lE6MDnByZXByb2Nlc3NfZm5zlF2UjBRtYXplMmRfc2V0X3Rlcm1pbmFsc5RhjAlzYXZlX2RpZmaUaAJoBmhVhpRSlHViaA+GlFKULg=="
+    },
+    "model": "models.TemporalUnet",
+    "n_diffusion_steps": 16,
+    "n_reference": 50,
+    "n_samples": 10,
+    "n_saves": 50,
+    "n_steps_per_epoch": 60000,
+    "n_train_steps": 60000.0,
+    "normalizer": "LimitsNormalizer",
+    "predict_epsilon": true,
+    "prefix": "diffusion/",
+    "preprocess_fns": [
+        "maze2d_set_terminals"
+    ],
+    "read_config": {
+        "_string": "<bound method Parser.read_config of Parser(prog='train_tour.py', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)>",
+        "_type": "python_object (type = method)",
+        "_value": "gASVlAQAAAAAAACMCGJ1aWx0aW5zlIwHZ2V0YXR0cpSTlIwIX19tYWluX1+UjAZQYXJzZXKUk5QpgZR9lCiMCHNldF9zZWVklGgCaAZoCIaUUpSMCHNhdmVwYXRolIwnbG9ncy9tYXplMmQtbGFyZ2UtdjEvZGlmZnVzaW9uL0gzODRfVDE2lIwKbm9ybWFsaXplcpSMEExpbWl0c05vcm1hbGl6ZXKUjAVta2RpcpRoAmgGaA+GlFKUjApnZXRfY29tbWl0lGgCaAZoEoaUUpSMCGV4cF9uYW1llIwSZGlmZnVzaW9uL0gzODRfVDE2lIwLc2FtcGxlX2ZyZXGUTegDjAdob3Jpem9ulE2AAYwGY29uZmlnlIwNY29uZmlnLm1hemUyZJSMDWV2YWxfZnN0cmluZ3OUaAJoBmgbhpRSlIwKYmF0Y2hfc2l6ZZRLAYwGZGV2aWNllIwEY3VkYZSMC25fcmVmZXJlbmNllEsyjA1zYXZlX3BhcmFsbGVslImMGWdyYWRpZW50X2FjY3VtdWxhdGVfZXZlcnmUSwiMDWFjdGlvbl93ZWlnaHSUSwGMCWRpZmZ1c2lvbpSMGG1vZGVscy5HYXVzc2lhbkRpZmZ1c2lvbpSMBmJ1Y2tldJROjAZwcmVmaXiUjApkaWZmdXNpb24vlIwNbl90cmFpbl9zdGVwc5RHQO1MAAAAAACMC3JlYWRfY29uZmlnlGgCaAZoK4aUUpSMDWxvc3NfZGlzY291bnSUSwGMEW5fc3RlcHNfcGVyX2Vwb2NolE1g6owMbG9zc193ZWlnaHRzlE6MD21heF9wYXRoX2xlbmd0aJRNQJyMDWNsaXBfZGVub2lzZWSUiIwFbW9kZWyUjBNtb2RlbHMuVGVtcG9yYWxVbmV0lIwJZW1hX2RlY2F5lEc/79cKPXCj14wPcHJlZGljdF9lcHNpbG9ulIiMB2xvZ2Jhc2WUjARsb2dzlIwRbl9kaWZmdXNpb25fc3RlcHOUSxCMB2RhdGFzZXSUjA9tYXplMmQtbGFyZ2UtdjGUjA1sZWFybmluZ19yYXRllEc+1Pi1iONo8YwLdXNlX3BhZGRpbmeUiYwKYWRkX2V4dHJhc5RoAmgGaD6GlFKUjAlkaW1fbXVsdHOUSwFLBEsIh5SMCWxvc3NfdHlwZZSMBnNwbGluZZSMCW5fc2FtcGxlc5RLCowIcmVuZGVyZXKUjBR1dGlscy5NYXplMmRSZW5kZXJlcpSMCXNhdmVfZnJlcZRN0AeMEWdlbmVyYXRlX2V4cF9uYW1llGgCaAZoSYaUUpSMBmxvYWRlcpSMFGRhdGFzZXRzLkdvYWxEYXRhc2V0lIwGY29tbWl0lIwvMTNiNGQ0MDRiZGJkOWQwZDc0YzA4ZDVhNTFlZTM5Zjg0ZTc2NTgzMCBtYXplMmSUjAduX3NhdmVzlEsyjBN0ZXJtaW5hdGlvbl9wZW5hbHR5lE6MDnByZXByb2Nlc3NfZm5zlF2UjBRtYXplMmRfc2V0X3Rlcm1pbmFsc5RhjAlzYXZlX2RpZmaUaAJoBmhVhpRSlHViaCuGlFKULg=="
+    },
+    "renderer": "utils.Maze2dRenderer",
+    "reproducibility": {
+        "command_line": "python scripts/train_tour.py",
+        "git_has_uncommitted_changes": true,
+        "git_root": "/local/home/atcelen/work/diffuser",
+        "git_url": "https://github.com/jannerm/diffuser/tree/13b4d404bdbd9d0d74c08d5a51ee39f84e765830",
+        "time": "Thu Jun 26 17:05:14 2025"
+    },
+    "sample_freq": 1000,
+    "save_diff": {
+        "_string": "<bound method Parser.save_diff of Parser(prog='train_tour.py', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)>",
+        "_type": "python_object (type = method)",
+        "_value": "gASVlAQAAAAAAACMCGJ1aWx0aW5zlIwHZ2V0YXR0cpSTlIwIX19tYWluX1+UjAZQYXJzZXKUk5QpgZR9lCiMCHNldF9zZWVklGgCaAZoCIaUUpSMCHNhdmVwYXRolIwnbG9ncy9tYXplMmQtbGFyZ2UtdjEvZGlmZnVzaW9uL0gzODRfVDE2lIwKbm9ybWFsaXplcpSMEExpbWl0c05vcm1hbGl6ZXKUjAVta2RpcpRoAmgGaA+GlFKUjApnZXRfY29tbWl0lGgCaAZoEoaUUpSMCGV4cF9uYW1llIwSZGlmZnVzaW9uL0gzODRfVDE2lIwLc2FtcGxlX2ZyZXGUTegDjAdob3Jpem9ulE2AAYwGY29uZmlnlIwNY29uZmlnLm1hemUyZJSMDWV2YWxfZnN0cmluZ3OUaAJoBmgbhpRSlIwKYmF0Y2hfc2l6ZZRLAYwGZGV2aWNllIwEY3VkYZSMC25fcmVmZXJlbmNllEsyjA1zYXZlX3BhcmFsbGVslImMGWdyYWRpZW50X2FjY3VtdWxhdGVfZXZlcnmUSwiMDWFjdGlvbl93ZWlnaHSUSwGMCWRpZmZ1c2lvbpSMGG1vZGVscy5HYXVzc2lhbkRpZmZ1c2lvbpSMBmJ1Y2tldJROjAZwcmVmaXiUjApkaWZmdXNpb24vlIwNbl90cmFpbl9zdGVwc5RHQO1MAAAAAACMC3JlYWRfY29uZmlnlGgCaAZoK4aUUpSMDWxvc3NfZGlzY291bnSUSwGMEW5fc3RlcHNfcGVyX2Vwb2NolE1g6owMbG9zc193ZWlnaHRzlE6MD21heF9wYXRoX2xlbmd0aJRNQJyMDWNsaXBfZGVub2lzZWSUiIwFbW9kZWyUjBNtb2RlbHMuVGVtcG9yYWxVbmV0lIwJZW1hX2RlY2F5lEc/79cKPXCj14wPcHJlZGljdF9lcHNpbG9ulIiMB2xvZ2Jhc2WUjARsb2dzlIwRbl9kaWZmdXNpb25fc3RlcHOUSxCMB2RhdGFzZXSUjA9tYXplMmQtbGFyZ2UtdjGUjA1sZWFybmluZ19yYXRllEc+1Pi1iONo8YwLdXNlX3BhZGRpbmeUiYwKYWRkX2V4dHJhc5RoAmgGaD6GlFKUjAlkaW1fbXVsdHOUSwFLBEsIh5SMCWxvc3NfdHlwZZSMBnNwbGluZZSMCW5fc2FtcGxlc5RLCowIcmVuZGVyZXKUjBR1dGlscy5NYXplMmRSZW5kZXJlcpSMCXNhdmVfZnJlcZRN0AeMEWdlbmVyYXRlX2V4cF9uYW1llGgCaAZoSYaUUpSMBmxvYWRlcpSMFGRhdGFzZXRzLkdvYWxEYXRhc2V0lIwGY29tbWl0lIwvMTNiNGQ0MDRiZGJkOWQwZDc0YzA4ZDVhNTFlZTM5Zjg0ZTc2NTgzMCBtYXplMmSUjAduX3NhdmVzlEsyjBN0ZXJtaW5hdGlvbl9wZW5hbHR5lE6MDnByZXByb2Nlc3NfZm5zlF2UjBRtYXplMmRfc2V0X3Rlcm1pbmFsc5RhjAlzYXZlX2RpZmaUaAJoBmhVhpRSlHViaFWGlFKULg=="
+    },
+    "save_freq": 2000,
+    "save_parallel": false,
+    "savepath": "logs/maze2d-large-v1/diffusion/H384_T16",
+    "set_seed": {
+        "_string": "<bound method Parser.set_seed of Parser(prog='train_tour.py', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)>",
+        "_type": "python_object (type = method)",
+        "_value": "gASVlAQAAAAAAACMCGJ1aWx0aW5zlIwHZ2V0YXR0cpSTlIwIX19tYWluX1+UjAZQYXJzZXKUk5QpgZR9lCiMCHNldF9zZWVklGgCaAZoCIaUUpSMCHNhdmVwYXRolIwnbG9ncy9tYXplMmQtbGFyZ2UtdjEvZGlmZnVzaW9uL0gzODRfVDE2lIwKbm9ybWFsaXplcpSMEExpbWl0c05vcm1hbGl6ZXKUjAVta2RpcpRoAmgGaA+GlFKUjApnZXRfY29tbWl0lGgCaAZoEoaUUpSMCGV4cF9uYW1llIwSZGlmZnVzaW9uL0gzODRfVDE2lIwLc2FtcGxlX2ZyZXGUTegDjAdob3Jpem9ulE2AAYwGY29uZmlnlIwNY29uZmlnLm1hemUyZJSMDWV2YWxfZnN0cmluZ3OUaAJoBmgbhpRSlIwKYmF0Y2hfc2l6ZZRLAYwGZGV2aWNllIwEY3VkYZSMC25fcmVmZXJlbmNllEsyjA1zYXZlX3BhcmFsbGVslImMGWdyYWRpZW50X2FjY3VtdWxhdGVfZXZlcnmUSwiMDWFjdGlvbl93ZWlnaHSUSwGMCWRpZmZ1c2lvbpSMGG1vZGVscy5HYXVzc2lhbkRpZmZ1c2lvbpSMBmJ1Y2tldJROjAZwcmVmaXiUjApkaWZmdXNpb24vlIwNbl90cmFpbl9zdGVwc5RHQO1MAAAAAACMC3JlYWRfY29uZmlnlGgCaAZoK4aUUpSMDWxvc3NfZGlzY291bnSUSwGMEW5fc3RlcHNfcGVyX2Vwb2NolE1g6owMbG9zc193ZWlnaHRzlE6MD21heF9wYXRoX2xlbmd0aJRNQJyMDWNsaXBfZGVub2lzZWSUiIwFbW9kZWyUjBNtb2RlbHMuVGVtcG9yYWxVbmV0lIwJZW1hX2RlY2F5lEc/79cKPXCj14wPcHJlZGljdF9lcHNpbG9ulIiMB2xvZ2Jhc2WUjARsb2dzlIwRbl9kaWZmdXNpb25fc3RlcHOUSxCMB2RhdGFzZXSUjA9tYXplMmQtbGFyZ2UtdjGUjA1sZWFybmluZ19yYXRllEc+1Pi1iONo8YwLdXNlX3BhZGRpbmeUiYwKYWRkX2V4dHJhc5RoAmgGaD6GlFKUjAlkaW1fbXVsdHOUSwFLBEsIh5SMCWxvc3NfdHlwZZSMBnNwbGluZZSMCW5fc2FtcGxlc5RLCowIcmVuZGVyZXKUjBR1dGlscy5NYXplMmRSZW5kZXJlcpSMCXNhdmVfZnJlcZRN0AeMEWdlbmVyYXRlX2V4cF9uYW1llGgCaAZoSYaUUpSMBmxvYWRlcpSMFGRhdGFzZXRzLkdvYWxEYXRhc2V0lIwGY29tbWl0lIwvMTNiNGQ0MDRiZGJkOWQwZDc0YzA4ZDVhNTFlZTM5Zjg0ZTc2NTgzMCBtYXplMmSUjAduX3NhdmVzlEsyjBN0ZXJtaW5hdGlvbl9wZW5hbHR5lE6MDnByZXByb2Nlc3NfZm5zlF2UjBRtYXplMmRfc2V0X3Rlcm1pbmFsc5RhjAlzYXZlX2RpZmaUaAJoBmhVhpRSlHViaAiGlFKULg=="
+    },
+    "termination_penalty": null,
+    "use_padding": false
+}

residual-diffuser/dataset_config.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c441212970d6518583ad1a3d0dcb8a68690102f1dbca6bc3fbdbe3ec5b8430f8
+size 280

residual-diffuser/diff.txt ADDED Viewed

	@@ -0,0 +1,1895 @@

+diff --git a/config/locomotion.py b/config/locomotion.py
+deleted file mode 100644
+index 4410bb1..0000000
+--- a/config/locomotion.py
++++ /dev/null
+@@ -1,70 +0,0 @@
+-import socket
+-
+-from diffuser.utils import watch
+-
+-#------------------------ base ------------------------#
+-
+-## automatically make experiment names for planning
+-## by labelling folders with these args
+-
+-diffusion_args_to_watch = [
+-    ('prefix', ''),
+-    ('horizon', 'H'),
+-    ('n_diffusion_steps', 'T'),
+-]
+-
+-base = {
+-    'diffusion': {
+-        ## model
+-        'model': 'models.TemporalUnet',
+-        'diffusion': 'models.GaussianDiffusion',
+-        'horizon': 32,
+-        'n_diffusion_steps': 100,
+-        'action_weight': 10,
+-        'loss_weights': None,
+-        'loss_discount': 1,
+-        'predict_epsilon': False,
+-        'dim_mults': (1, 4, 8),
+-        'renderer': 'utils.MuJoCoRenderer',
+-
+-        ## dataset
+-        'loader': 'datasets.SequenceDataset',
+-        'normalizer': 'LimitsNormalizer',
+-        'preprocess_fns': [],
+-        'clip_denoised': True,
+-        'use_padding': True,
+-        'max_path_length': 1000,
+-
+-        ## serialization
+-        'logbase': 'logs',
+-        'prefix': 'diffusion/',
+-        'exp_name': watch(diffusion_args_to_watch),
+-
+-        ## training
+-        'n_steps_per_epoch': 10000,
+-        'loss_type': 'l2',
+-        'n_train_steps': 1e6,
+-        'batch_size': 32,
+-        'learning_rate': 2e-4,
+-        'gradient_accumulate_every': 2,
+-        'ema_decay': 0.995,
+-        'save_freq': 1000,
+-        'sample_freq': 1000,
+-        'n_saves': 5,
+-        'save_parallel': False,
+-        'n_reference': 8,
+-        'n_samples': 2,
+-        'bucket': None,
+-        'device': 'cuda',
+-    },
+-}
+-
+-#------------------------ overrides ------------------------#
+-
+-## put environment-specific overrides here
+-
+-halfcheetah_medium_expert_v2 = {
+-    'diffusion': {
+-        'horizon': 16,
+-    },
+-}
+diff --git a/config/maze2d.py b/config/maze2d.py
+index a06ac7f..0a8d22a 100644
+--- a/config/maze2d.py
++++ b/config/maze2d.py
+@@ -34,11 +34,11 @@ base = {
+         'model': 'models.TemporalUnet',
+         'diffusion': 'models.GaussianDiffusion',
+         'horizon': 256,
+-        'n_diffusion_steps': 256,
++        'n_diffusion_steps': 512,
+         'action_weight': 1,
+         'loss_weights': None,
+         'loss_discount': 1,
+-        'predict_epsilon': False,
++        'predict_epsilon': True,
+         'dim_mults': (1, 4, 8),
+         'renderer': 'utils.Maze2dRenderer',
+@@ -57,14 +57,14 @@ base = {
+         'exp_name': watch(diffusion_args_to_watch),
+         ## training
+-        'n_steps_per_epoch': 10000,
+-        'loss_type': 'l2',
+-        'n_train_steps': 2e6,
+-        'batch_size': 32,
+-        'learning_rate': 2e-4,
+-        'gradient_accumulate_every': 2,
++        'n_steps_per_epoch': 60000,
++        'loss_type': 'spline',
++        'n_train_steps': 6e4,
++        'batch_size': 1,
++        'learning_rate': 5e-6,
++        'gradient_accumulate_every': 8,
+         'ema_decay': 0.995,
+-        'save_freq': 1000,
++        'save_freq': 2000,
+         'sample_freq': 1000,
+         'n_saves': 50,
+         'save_parallel': False,
+@@ -89,7 +89,6 @@ base = {
+         'prefix': 'plans/release',
+         'exp_name': watch(plan_args_to_watch),
+         'suffix': '0',
+-
+         'conditional': False,
+         ## loading
+@@ -122,10 +121,10 @@ maze2d_umaze_v1 = {
+ maze2d_large_v1 = {
+     'diffusion': {
+         'horizon': 384,
+-        'n_diffusion_steps': 256,
++        'n_diffusion_steps': 16,
+     },
+     'plan': {
+         'horizon': 384,
+-        'n_diffusion_steps': 256,
++        'n_diffusion_steps': 16,
+     },
+ }
+diff --git a/diffuser/datasets/buffer.py b/diffuser/datasets/buffer.py
+index 1ad2106..5991f01 100644
+--- a/diffuser/datasets/buffer.py
++++ b/diffuser/datasets/buffer.py
+@@ -9,7 +9,7 @@ class ReplayBuffer:
+     def __init__(self, max_n_episodes, max_path_length, termination_penalty):
+         self._dict = {
+-            'path_lengths': np.zeros(max_n_episodes, dtype=np.int),
++            'path_lengths': np.zeros(max_n_episodes, dtype=np.int_),
+         }
+         self._count = 0
+         self.max_n_episodes = max_n_episodes
+diff --git a/diffuser/datasets/sequence.py b/diffuser/datasets/sequence.py
+index 356c540..73c1b04 100644
+--- a/diffuser/datasets/sequence.py
++++ b/diffuser/datasets/sequence.py
+@@ -83,6 +83,7 @@ class SequenceDataset(torch.utils.data.Dataset):
+         actions = self.fields.normed_actions[path_ind, start:end]
+         conditions = self.get_conditions(observations)
++
+         trajectories = np.concatenate([actions, observations], axis=-1)
+         batch = Batch(trajectories, conditions)
+         return batch
+diff --git a/diffuser/models/diffusion.py b/diffuser/models/diffusion.py
+index fae4cfd..461680a 100644
+--- a/diffuser/models/diffusion.py
++++ b/diffuser/models/diffusion.py
+@@ -2,6 +2,7 @@ import numpy as np
+ import torch
+ from torch import nn
+ import pdb
++import matplotlib.pyplot as plt
+ import diffuser.utils as utils
+ from .helpers import (
+@@ -9,6 +10,7 @@ from .helpers import (
+     extract,
+     apply_conditioning,
+     Losses,
++    catmull_rom_spline_with_rotation,
+ )
+ class GaussianDiffusion(nn.Module):
+@@ -26,6 +28,7 @@ class GaussianDiffusion(nn.Module):
+         betas = cosine_beta_schedule(n_timesteps)
+         alphas = 1. - betas
+         alphas_cumprod = torch.cumprod(alphas, axis=0)
++        print(f"Alphas Cumprod: {alphas_cumprod}")
+         alphas_cumprod_prev = torch.cat([torch.ones(1), alphas_cumprod[:-1]])
+         self.n_timesteps = int(n_timesteps)
+@@ -73,7 +76,7 @@ class GaussianDiffusion(nn.Module):
+         '''
+         self.action_weight = action_weight
+-        dim_weights = torch.ones(self.transition_dim, dtype=torch.float32)
++        dim_weights = torch.ones(self.transition_dim, dtype=torch.float64)
+         ## set loss coefficients for dimensions of observation
+         if weights_dict is None: weights_dict = {}
+@@ -97,18 +100,16 @@ class GaussianDiffusion(nn.Module):
+             otherwise, model predicts x0 directly
+         '''
+         if self.predict_epsilon:
+-            return (
+-                extract(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t -
+-                extract(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise
+-            )
++            return noise
+         else:
+             return noise
+     def q_posterior(self, x_start, x_t, t):
+         posterior_mean = (
+             extract(self.posterior_mean_coef1, t, x_t.shape) * x_start +
+-            extract(self.posterior_mean_coef2, t, x_t.shape) * x_t
++            extract(self.posterior_mean_coef2, t, x_t.shape) * x_t[:, :, self.action_dim:]
+         )
++
+         posterior_variance = extract(self.posterior_variance, t, x_t.shape)
+         posterior_log_variance_clipped = extract(self.posterior_log_variance_clipped, t, x_t.shape)
+         return posterior_mean, posterior_variance, posterior_log_variance_clipped
+@@ -129,7 +130,7 @@ class GaussianDiffusion(nn.Module):
+     def p_sample(self, x, cond, t):
+         b, *_, device = *x.shape, x.device
+         model_mean, _, model_log_variance = self.p_mean_variance(x=x, cond=cond, t=t)
+-        noise = torch.randn_like(x)
++        noise = torch.randn_like(x[:, :, self.action_dim:])
+         # no noise when t == 0
+         nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
+         return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise
+@@ -139,22 +140,59 @@ class GaussianDiffusion(nn.Module):
+         device = self.betas.device
+         batch_size = shape[0]
+-        x = torch.randn(shape, device=device)
+-        x = apply_conditioning(x, cond, self.action_dim)
++        # x = torch.randn(shape, device=device, dtype=torch.float64)
++        # Extract known indices and values
++        known_indices = np.array(list(cond.keys()), dtype=int)
++
++        # candidate_no x batch_size x dim
++        known_values = np.stack([c.cpu().numpy() for c in cond.values()], axis=0)
++        known_values = np.moveaxis(known_values, 0, 1)
++
++        # Sort the timepoints
++        sorted_indices = np.argsort(known_indices)
++        known_indices = known_indices[sorted_indices]
++        known_values = known_values[:, sorted_indices]
++
++        # Build the structured spline guess
++        catmull_spline_trajectory = np.array([
++            catmull_rom_spline_with_rotation(known_values[b, :, :-1], known_indices, shape[1])
++            for b in range(batch_size)
++        ])
++        catmull_spline_trajectory = torch.tensor(
++            catmull_spline_trajectory,
++            dtype=torch.float64,
++            device=device
++        )
++
++
++        if self.predict_epsilon:
++            x = torch.randn((shape[0], shape[1], self.observation_dim), device=device, dtype=torch.float64)
++            cond_residual = {k: torch.zeros_like(v)[:, :-1] for k, v in cond.items()}
++            is_cond = torch.zeros((shape[0], shape[1], 1), device=device, dtype=torch.float64)
++            is_cond[:, known_indices, :] = 1.0
+         if return_diffusion: diffusion = [x]
+-        progress = utils.Progress(self.n_timesteps) if verbose else utils.Silent()
++        # progress = utils.Progress(self.n_timesteps) if verbose else utils.Silent()
+         for i in reversed(range(0, self.n_timesteps)):
++            if self.predict_epsilon:
++                x = torch.cat([catmull_spline_trajectory, is_cond, x], dim=-1)
++
+             timesteps = torch.full((batch_size,), i, device=device, dtype=torch.long)
+-            x = self.p_sample(x, cond, timesteps)
+-            x = apply_conditioning(x, cond, self.action_dim)
++            x = self.p_sample(x, cond_residual, timesteps)
++
++            x = apply_conditioning(x, cond_residual, 0)
+-            progress.update({'t': i})
++            if return_diffusion: diffusion.append(x)
+-            if return_diffusion: diffusion.append(x)
++        x = catmull_spline_trajectory + x
+-        progress.close()
++
++
++        # Normalize the quaternions
++        # x[:, :, 3:7] = x[:, :, 3:7] / torch.norm(x[:, :, 3:7], dim=-1, keepdim=True)
++
++        # progress.close()
+         if return_diffusion:
+             return x, torch.stack(diffusion, dim=1)
+@@ -167,7 +205,7 @@ class GaussianDiffusion(nn.Module):
+             conditions : [ (time, state), ... ]
+         '''
+         device = self.betas.device
+-        batch_size = len(cond[0])
++        batch_size = len(next(iter(cond.values())))
+         horizon = horizon or self.horizon
+         shape = (batch_size, horizon, self.transition_dim)
+@@ -175,38 +213,106 @@ class GaussianDiffusion(nn.Module):
+     #------------------------------------------ training ------------------------------------------#
+-    def q_sample(self, x_start, t, noise=None):
++    def q_sample(self, x_start, t, spline=None, noise=None):
++        x_start_noise = x_start[:, : , :-1]
++        x_start_is_cond = x_start[:, :, [-1]]
++
++        if spline is None:
++            spline = torch.randn_like(x_start_noise)
+         if noise is None:
+-            noise = torch.randn_like(x_start)
++            noise = torch.randn_like(x_start_noise)
+-        sample = (
+-            extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
+-            extract(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise
+-        )
++        alpha = extract(self.sqrt_alphas_cumprod, t, x_start.shape)
++        oneminusalpha = extract(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape)
++
++        # Weighted combination of x_0 and the spline
++        out = alpha * x_start_noise + oneminusalpha * noise
++
++        # Concatenate the binary feature and the spline as the conditioning
++        out = torch.cat([spline, x_start_is_cond, out], dim=-1)
+-        return sample
++        return out
+     def p_losses(self, x_start, cond, t):
+-        noise = torch.randn_like(x_start)
++        batch_size, horizon, _ = x_start.shape
++        # Extract known indices and values
++        known_indices = np.array(list(cond.keys()), dtype=int)
++
++        # candidate_no x batch_size x dim
++        known_values = np.stack([c.cpu().numpy() for c in cond.values()], axis=0)
++        known_values = np.moveaxis(known_values, 0, 1)
++
++        # Sort the timepoints
++        sorted_indices = np.argsort(known_indices)
++        known_indices = known_indices[sorted_indices]
++        known_values = known_values[:, sorted_indices]
++
++        # Build your structured guess
++        catmull_spline_trajectory = np.array([
++            catmull_rom_spline_with_rotation(known_values[b, :, :-1], known_indices, horizon)
++            for b in range(batch_size)
++        ])
++        catmull_spline_trajectory = torch.tensor(
++            catmull_spline_trajectory,
++            dtype=torch.float64,
++            device=x_start.device
++        )
+-        x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
+-        x_noisy = apply_conditioning(x_noisy, cond, self.action_dim)
++        # Plot the quaternions
++        # plt.plot(x_start[0, :, 3].cpu().numpy())
++        # plt.plot(catmull_spline_trajectory[0, :, 3].cpu().numpy())
++        # plt.legend(["x_start", "catmull_spline"])
++        # plt.show()
++        # raise Exception
+-        x_recon = self.model(x_noisy, cond, t)
+-        x_recon = apply_conditioning(x_recon, cond, self.action_dim)
+-        assert noise.shape == x_recon.shape
++        if not self.predict_epsilon:
++            # Forward diffuse with the structured trajectory
++            x_noisy = self.q_sample(
++                x_start,
++                t,
++                spline=catmull_spline_trajectory,
++            )
++            x_noisy = apply_conditioning(x_noisy, cond, self.action_dim)
+-        if self.predict_epsilon:
+-            loss, info = self.loss_fn(x_recon, noise)
++            # Reverse pass guess
++            x_recon = self.model(x_noisy, cond, t)
++            x_recon = apply_conditioning(x_recon, cond, self.action_dim)
++
++            # Then x_recon is the predicted x_0, compare to the true x_0
++            loss, info = self.loss_fn(x_recon, x_start, cond)
+         else:
+-            loss, info = self.loss_fn(x_recon, x_start)
++            residual = x_start.clone()
++
++            residual[:, :, :-1] -= catmull_spline_trajectory
++
++
++            cond_residual = {k: torch.zeros_like(v)[:, :-1] for k, v in cond.items()}
++
++            x_noisy = self.q_sample(
++                residual,
++                t,
++                spline=catmull_spline_trajectory,
++            )
++            x_noisy = apply_conditioning(x_noisy, cond_residual, self.action_dim)
++
++            # Reverse pass guess
++            x_recon = self.model(x_noisy, cond, t)
++            x_recon = apply_conditioning(x_recon, cond_residual, 0)
++
++            x_recon = x_recon + catmull_spline_trajectory
++
++            loss, info = self.loss_fn(x_recon, x_start[:, :, :-1], cond)
+         return loss, info
+     def loss(self, x, cond):
+         batch_size = len(x)
+         t = torch.randint(0, self.n_timesteps, (batch_size,), device=x.device).long()
++        # t = torch.randint(1, 2, (batch_size,), device=x.device).long()
++        # x = x.double()
++        # cond = {k: v.double() for k, v in cond.items()}
++        # print(f"Time: {t.item()}")
+         return self.p_losses(x, cond, t)
+     def forward(self, cond, *args, **kwargs):
+diff --git a/diffuser/models/helpers.py b/diffuser/models/helpers.py
+index d39f35d..9f43ef8 100644
+--- a/diffuser/models/helpers.py
++++ b/diffuser/models/helpers.py
+@@ -1,11 +1,11 @@
+ import math
++import json
+ import numpy as np
+ import torch
+ import torch.nn as nn
+ import torch.nn.functional as F
+-import einops
+ from einops.layers.torch import Rearrange
+-import pdb
++from pytorch3d.transforms import quaternion_to_matrix, quaternion_to_axis_angle
+ import diffuser.utils as utils
+@@ -30,7 +30,7 @@ class SinusoidalPosEmb(nn.Module):
+ class Downsample1d(nn.Module):
+     def __init__(self, dim):
+         super().__init__()
+-        self.conv = nn.Conv1d(dim, dim, 3, 2, 1)
++        self.conv = nn.Conv1d(dim, dim, 3, 2, 1).to(torch.float64)
+     def forward(self, x):
+         return self.conv(x)
+@@ -38,7 +38,7 @@ class Downsample1d(nn.Module):
+ class Upsample1d(nn.Module):
+     def __init__(self, dim):
+         super().__init__()
+-        self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1)
++        self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1).to(torch.float64)
+     def forward(self, x):
+         return self.conv(x)
+@@ -52,9 +52,9 @@ class Conv1dBlock(nn.Module):
+         super().__init__()
+         self.block = nn.Sequential(
+-            nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2),
++            nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2).to(torch.float64),
+             Rearrange('batch channels horizon -> batch channels 1 horizon'),
+-            nn.GroupNorm(n_groups, out_channels),
++            nn.GroupNorm(n_groups, out_channels).to(torch.float64),
+             Rearrange('batch channels 1 horizon -> batch channels horizon'),
+             nn.Mish(),
+         )
+@@ -72,7 +72,7 @@ def extract(a, t, x_shape):
+     out = a.gather(-1, t)
+     return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+-def cosine_beta_schedule(timesteps, s=0.008, dtype=torch.float32):
++def cosine_beta_schedule(timesteps, s=0.008, dtype=torch.float64):
+     """
+     cosine schedule
+     as proposed in https://openreview.net/forum?id=-NEXDKk8gZ
+@@ -157,9 +157,979 @@ class ValueL2(ValueLoss):
+     def _loss(self, pred, targ):
+         return F.mse_loss(pred, targ, reduction='none')
++class GeodesicL2Loss(nn.Module):
++    def __init__(self, *args):
++        super().__init__()
++        pass
++
++    def _loss(self, pred, targ):
++        # Compute L2 loss for the first three dimensions
++        l2_loss = F.mse_loss(pred[..., :3], targ[..., :3], reduction='mean')
++
++        # Normalize to unit quaternions for the last four dimensions
++        pred_quat = pred[..., 3:] / pred[..., 3:].norm(dim=-1, keepdim=True)
++        targ_quat = targ[..., 3:] / targ[..., 3:].norm(dim=-1, keepdim=True)
++
++        assert not torch.isnan(pred_quat).any(), "Pred Quat has NaNs"
++        assert not torch.isnan(targ_quat).any(), "Targ Quat has NaNs"
++
++        # Compute dot product for the quaternions
++        dot_product = torch.sum(pred_quat * targ_quat, dim=-1)
++        dot_product = torch.clamp(torch.abs(dot_product), -1.0, 1.0)
++
++        # Compute geodesic loss for the quaternions
++        geodesic_loss = 2 * torch.acos(dot_product).mean()
++
++        assert not torch.isnan(geodesic_loss).any(), "Geodesic Loss has NaNs"
++        assert not torch.isnan(l2_loss).any(), "L2 Loss has NaNs"
++
++        return l2_loss + geodesic_loss, l2_loss, geodesic_loss
++
++    def forward(self, pred, targ):
++        loss, l2, geodesic = self._loss(pred, targ)
++
++        info = {
++            'l2': l2.item(),
++            'geodesic': geodesic.item(),
++        }
++
++        return loss, info
++
++class RotationTranslationLoss(nn.Module):
++    def __init__(self, *args):
++        super().__init__()
++        pass
++
++    def _loss(self, pred, targ, cond=None):
++
++        # Make sure the dtype is float64
++        pred = pred.to(torch.float64)
++        targ = targ.to(torch.float64)
++
++        eps = 1e-8
++
++        pred_trans = pred[..., :3]
++        pred_quat = pred[..., 3:7]
++        targ_trans = targ[..., :3]
++        targ_quat = targ[..., 3:7]
++
++        l2_loss = F.mse_loss(pred_trans, targ_trans, reduction='mean')
++
++        # Calculate the geodesic loss
++        pred_n = pred_quat.norm(dim=-1, keepdim=True).clamp(min=eps)
++        targ_n = targ_quat.norm(dim=-1, keepdim=True).clamp(min=eps)
++
++        pred_quat_norm = pred_quat / pred_n
++        targ_quat_norm = targ_quat / targ_n
++
++
++        dot_product = torch.sum(pred_quat_norm * targ_quat_norm, dim=-1).clamp(min=-1.0 + eps, max=1.0 - eps)
++        quaternion_dist = 1 - (dot_product ** 2).mean()
++
++        # Calculate the rotation error
++        pred_rot = quaternion_to_matrix(pred_quat_norm).reshape(-1, 3, 3)
++        targ_rot = quaternion_to_matrix(targ_quat_norm).reshape(-1, 3, 3)
++
++        r2r1 = pred_rot @ targ_rot.permute(0, 2, 1)
++        trace = torch.diagonal(r2r1, dim1=-2, dim2=-1).sum(-1)
++        trace = torch.clamp((trace - 1) / 2, -1.0 + eps, 1.0 - eps)
++        geodesic_loss = torch.acos(trace).mean()
++
++        # Add a smoothness and acceleration term to the positions and quaternions
++        alpha = 1.0
++        smoothness_loss = F.mse_loss(pred[:, 1:, :7].reshape(-1, 7), pred[:, :-1, :7].reshape(-1, 7), reduction='mean')
++        acceleration_loss = F.mse_loss(pred[:, 2:, :7].reshape(-1, 7), 2 * pred[:, 1:-1, :7].reshape(-1, 7) - pred[:, :-2, :7].reshape(-1, 7), reduction='mean')
++
++        l2_multiplier = 10.0
++
++        loss = l2_multiplier * l2_loss + quaternion_dist + geodesic_loss + alpha * (smoothness_loss + acceleration_loss)
++
++        dtw = DynamicTimeWarpingLoss()
++        dtw_loss, _ = dtw.forward(pred_trans.reshape(-1, 3), targ_trans.reshape(-1, 3))
++
++        hausdorff = HausdorffDistanceLoss()
++        hausdorff_loss, _ = hausdorff.forward(pred_trans.reshape(-1, 3), targ_trans.reshape(-1, 3))
++
++        frec = FrechetDistanceLoss()
++        frechet_loss, _ = frec.forward(pred_trans.reshape(-1, 3), targ_trans.reshape(-1, 3))
++
++        chamfer = ChamferDistanceLoss()
++        chamfer_loss, _ = chamfer.forward(pred_trans.reshape(-1, 3), targ_trans.reshape(-1, 3))
++
++        return loss, l2_loss, geodesic_loss, quaternion_dist, dtw_loss, hausdorff_loss, frechet_loss, chamfer_loss
++
++
++    def forward(self, pred, targ, cond=None):
++        loss, err_t, err_geo, err_r, err_dtw, err_hausdorff, err_frechet, err_chamfer = self._loss(pred, targ, cond)
++
++        info = {
++            'rot. error': err_r.item(),
++            'geodesic error': err_geo.item(),
++            'trans. error': err_t.item(),
++            'dtw': err_dtw.item(),
++            'hausdorff': err_hausdorff.item(),
++            'frechet': err_frechet.item(),
++            'chamfer': err_chamfer.item(),
++        }
++
++        return loss, info
++
++class SplineLoss(nn.Module):
++    def __init__(self, *args):
++        super().__init__()
++        self.scales = json.load(open('scene_scale.json'))
++
++    def compute_spline_coeffs(self, trans):
++        p0 = trans[:, :-3, :]
++        p1 = trans[:, 1:-2, :]
++        p2 = trans[:, 2:-1, :]
++        p3 = trans[:, 3:, :]
++
++        # Tangent approximations
++        m1 = 0.5 * (-p0 + p2)
++        m2 = 0.5 * (-p1 + p3)
++
++        # Cubic spline coefficients for each dimension
++        a = (2 * p1 - 2 * p2 + m1 + m2)
++        b = (-3 * p1 + 3 * p2 - 2 * m1 - m2)
++        c = (m1)
++        d = (p1)
++
++        return torch.stack([a, b, c, d], dim=-1)
++
++    def q_normalize(self, q):
++        return q / q.norm(p=2, dim=-1, keepdim=True).clamp(min=1e-12)
++
++    def q_conjugate(self, q):
++        w, x, y, z = q[..., 0], q[..., 1], q[..., 2], q[..., 3]
++        return torch.stack([w, -x, -y, -z], dim=-1)
++
++    def q_multiply(self, q1, q2):
++        """
++        q1*q2.
++        """
++        w1, x1, y1, z1 = q1.unbind(-1)
++        w2, x2, y2, z2 = q2.unbind(-1)
++        w = w1*w2 - x1*x2 - y1*y2 - z1*z2
++        x = w1*x2 + x1*w2 + y1*z2 - z1*y2
++        y = w1*y2 - x1*z2 + y1*w2 + z1*x2
++        z = w1*z2 + x1*y2 - y1*x2 + z1*w2
++        return torch.stack([w, x, y, z], dim=-1)
++
++    def q_inverse(self, q):
++        return self.q_conjugate(self.q_normalize(q))
++
++    def q_log(self, q):
++        """
++        Quaternion logarithm for a unit quaternion
++        Only returns the imaginary part
++        """
++        q = self.q_normalize(q)
++        w = q[..., 0]
++        xyz = q[..., 1:]  # shape [..., 3]
++        mag_v = xyz.norm(p=2, dim=-1)
++        eps = 1e-12
++        angle = torch.acos(w.clamp(-1.0 + eps, 1.0 - eps))
++
++        # We do a safe-guard against zero for sin(angle)
++        small_mask = (mag_v < 1e-12) | (angle < 1e-12)
++        # Where small_mask is True => near identity => log(q) ~ 0
++        log_val = torch.zeros_like(xyz)
++
++        # Normal case
++        scale = angle / mag_v.clamp(min=1e-12)
++        normal_case = scale.unsqueeze(-1) * xyz
++
++        log_val = torch.where(
++            small_mask.unsqueeze(-1),
++            torch.zeros_like(xyz),
++            normal_case
++        )
++        return log_val
++
++    def q_exp(self, v):
++        """
++        Quaternion exponential
++        """
++        norm_v = v.norm(p=2, dim=-1)
++        small_mask = norm_v < 1e-12
++
++        w = torch.cos(norm_v)
++        sin_v = torch.sin(norm_v)
++        scale = torch.where(
++            small_mask,
++            torch.zeros_like(norm_v),  # if zero, sin(0)/0 => 0
++            sin_v / norm_v.clamp(min=1e-12)
++        )
++        xyz = scale.unsqueeze(-1) * v
++
++        # For small angles, we approximate cos(norm_v) ~ 1, sin(norm_v)/norm_v ~ 1
++        w = torch.where(
++            small_mask,
++            torch.ones_like(w),
++            w
++        )
++        return torch.cat([w.unsqueeze(-1), xyz], dim=-1)
++
++    def q_slerp(self, q1, q2, t):
++        """
++        Spherical linear interpolation from q1 to q2 at t in [0,1].
++        Both q1, q2 assumed normalized.
++        q1, q2, t can be 1D or broadcastable shapes, but typically 1D.
++        """
++        q1 = self.q_normalize(q1)
++        q2 = self.q_normalize(q2)
++        dot = (q1 * q2).sum(dim=-1, keepdim=True)  # the dot product
++
++        eps = 1e-12
++        dot = dot.clamp(-1.0 + eps, 1.0 - eps)
++
++        flip_mask = dot < 0.0
++        if flip_mask.any():
++            q2 = torch.where(flip_mask, -q2, q2)
++            dot = torch.where(flip_mask, -dot, dot)
++
++        # If they're very close, do a simple linear interpolation
++        close_mask = dot.squeeze(-1) > 0.9995
++        # Using an epsilon to avoid potential issues close to 1.0
++
++        # Branch 1: Very close
++        # linear LERP
++        lerp_val = (1.0 - t) * q1 + t * q2
++        lerp_val = self.q_normalize(lerp_val)
++
++        # Branch 2: Standard SLERP
++        theta_0 = torch.acos(dot)
++        sin_theta_0 = torch.sin(theta_0)
++        theta = theta_0 * t
++        s1 = torch.sin(theta_0 - theta) / sin_theta_0.clamp(min=1e-12)
++        s2 = torch.sin(theta) / sin_theta_0.clamp(min=1e-12)
++        slerp_val = s1 * q1 + s2 * q2
++        slerp_val = self.q_normalize(slerp_val)
++
++        # Combine
++        return torch.where(
++            close_mask.unsqueeze(-1),
++            lerp_val,
++            slerp_val
++        )
++
++    def compute_uniform_tangent(self, q_im1, q_i, q_ip1):
++        """
++        Computes a 'Catmull–Rom-like' tangent T_i for quaternion q_i,
++        given neighbors q_im1, q_i, q_ip1.
++
++        T_i = q_i * exp( -0.25 * [ log(q_i^-1 q_ip1) + log(q_i^-1 q_im1) ] )
++        """
++        q_im1 = self.q_normalize(q_im1)
++        q_i   = self.q_normalize(q_i)
++        q_ip1 = self.q_normalize(q_ip1)
++
++        inv_qi = self.q_inverse(q_i)
++        r1 = self.q_multiply(inv_qi, q_ip1)
++        r2 = self.q_multiply(inv_qi, q_im1)
++
++        lr1 = self.q_log(r1)
++        lr2 = self.q_log(r2)
++
++        m = -0.25 * (lr1 + lr2)
++        exp_m = self.q_exp(m)
++        return self.q_multiply(q_i, exp_m)
++
++    def compute_all_uniform_tangents(self, quats):
++        """
++        Vectorized version that computes tangents T_i for all keyframe quaternions at once.
++        quats shape: [N,4], N >= 2
++        Returns shape [N,4].
++        """
++        q_im1 = torch.cat([quats[[0]], quats[:-1]], dim=0)   # q_im1[0]   = q0
++        q_ip1 = torch.cat([quats[1:], quats[[-1]]], dim=0)   # q_ip1[N-1]= q_{N-1}
++
++        return self.compute_uniform_tangent(q_im1, quats, q_ip1)
++
++    def squad(self, q0, a, b, q1, t):
++        """
++        Shoemake's "squad" interpolation for quaternion splines:
++            squad(q0, a, b, q1; t) = slerp( slerp(q0, q1; t),
++                                            slerp(a,   b;   t),
++                                            2t(1-t) )
++        where a, b are tangential control quaternions for q0, q1.
++        """
++        s1 = self.q_slerp(q0, q1, t)
++        s2 = self.q_slerp(a,   b,   t)
++        alpha = 2.0*t*(1.0 - t)
++        return self.q_slerp(s1, s2, alpha)
++
++    def uniform_cr_spline(self, quats, num_samples_per_segment=10):
++        """
++        Given a list of keyframe quaternions quats (each a torch 1D tensor [4]),
++        compute a "Uniform Catmull–Rom–like" quaternion spline through them.
++
++        Returns:
++          A list (Python list) of interpolated quaternions (torch tensors),
++          including all segment endpoints.
++
++        Each interior qi gets a tangent T_i using neighbors q_{i-1}, q_i, q_{i+1}.
++        For boundary tangents, we replicate the end quaternions.
++        """
++        n = quats.shape[0]
++        if n < 2:
++            return quats.unsqueeze(0)  # not enough quats to interpolate
++
++        # Precompute tangents
++        tangents = self.compute_all_uniform_tangents(quats)
++
++        # Interpolate each segment [qi, q_{i+1}]
++        q0 = quats[:-1].unsqueeze(1)
++        q1 = quats[1:].unsqueeze(1)
++        a = tangents[:-1].unsqueeze(1)
++        b = tangents[1:].unsqueeze(1)
++
++        t_vals = torch.linspace(0.0, 1.0, num_samples_per_segment, device=quats.device, dtype=quats.dtype)
++        t_vals = t_vals.view(1, -1, 1)
++
++        out = self.squad(q0, a, b, q1, t_vals)
++        return out
++
++
++    def forward(self, pred, targ, cond=None, scene_id=None, norm_params=None):
++        loss, err_t, err_smooth, err_geo, err_r, err_dtw, err_hausdorff, err_frechet, err_chamfer = self._loss(pred, targ, cond, scene_id, norm_params)
++
++        info = {
++            'trans. error': err_t.item(),
++            'smoothness error': err_smooth.item(),
++            # 'dtw': err_dtw.item(),
++            # 'hausdorff': err_hausdorff.item(),
++            # 'frechet': err_frechet.item(),
++            # 'chamfer': err_chamfer.item(),
++            'quat. dist.': err_r.item(),
++            'geodesic dist.': err_geo.item(),
++        }
++
++        return loss, info
++
++    def _loss(self, pred, targ, cond=None, scene_id=None, norm_params=None):
++        def poly_eval(coeffs, x):
++            """
++            Evaluates a polynomial (with highest-degree term first) at points x.
++            coeffs: 2D tensor of shape [num_polynomials, degree + 1], highest-degree term first.
++            x: 1D tensor of points at which to evaluate the polynomial.
++            Returns:
++            2D tensor of shape [num_polynomials, len(x)], containing p(x).
++            """
++            x_powers = torch.stack([x**i for i in range(coeffs.shape[-1] - 1, -1, -1)], dim=-1)
++            x_powers = x_powers.to(torch.float64).to(coeffs.device)
++            y = torch.matmul(coeffs, x_powers.T)
++            return y
++
++        # Make sure the dtype is float64
++        pred = pred.to(torch.float64)
++        targ = targ.to(torch.float64)
++
++        # Rescale the translations
++        if scene_id is not None and norm_params is not None:
++            scene_id = scene_id.item()
++            scene_scale = self.scales[str(scene_id)]
++            scene_scale = norm_params['scale'][0] * scene_scale
++            pred[..., :3] = pred[..., :3] * scene_scale
++            targ[..., :3] = targ[..., :3] * scene_scale
++            # print(pred[..., :3].max(), targ[..., :3].max())
++
++        # We only consider interpolated points for loss calculation
++        candidate_idxs = sorted(cond.keys())
++        pred = pred[:, candidate_idxs[0] : candidate_idxs[-1] + 1, :]
++        targ = targ[:, candidate_idxs[0] : candidate_idxs[-1] + 1, :]
++
++        pred_trans = pred[..., :3]
++        pred_quat = pred[..., 3:7]
++        targ_trans = targ[..., :3]
++        targ_quat = targ[..., 3:7]
++
++        pred_coeffs = self.compute_spline_coeffs(pred_trans)
++        targ_coeffs = self.compute_spline_coeffs(targ_trans)
++
++        n_points = 2000
++
++        # Distribute sample points among intervals
++        dists = torch.norm(targ_trans[:, 1:, :] - targ_trans[:, :-1, :], dim=-1).reshape(-1)
++        dists_c = torch.zeros(len(candidate_idxs) - 1, device=pred.device)
++        for i in range(len(candidate_idxs) - 1):
++            dists_c[i] = dists[candidate_idxs[i]:candidate_idxs[i+1]].sum()
++
++        weights_c = dists_c / dists_c.sum()
++        scaled_c = weights_c * n_points
++        points_c = torch.floor(scaled_c).int()
++
++        while points_c.sum() < n_points:
++            idx = torch.argmax(scaled_c - points_c)
++            points_c[idx] += 1
++
++        # Calculate the spline loss
++        sample_points = 50
++        x = torch.linspace(0, 1, sample_points, device=pred.device)
++        pred_spline = poly_eval(pred_coeffs, x).permute(0, 1, 3, 2).reshape(-1, sample_points, 3)
++        targ_spline = poly_eval(targ_coeffs, x).permute(0, 1, 3, 2).reshape(-1, sample_points, 3)
++
++        indexes = []
++        start_idx = candidate_idxs[0]
++        for c, (idx_i0, idx_i1) in enumerate(zip(candidate_idxs[:-1], candidate_idxs[1:])):
++            p = points_c[c]
++            total_dist = dists_c[c]
++            dist_arr = dists[idx_i0 - start_idx : idx_i1 - start_idx]
++
++            step_distances = (dist_arr / sample_points).repeat_interleave(sample_points)
++            cumul_distances = step_distances.cumsum(dim=0)
++
++            dist_per_pick = total_dist / p
++            pick_targets = torch.arange(1, p + 1, device=dists.device) * dist_per_pick
++
++            pick_idxs = torch.searchsorted(cumul_distances, pick_targets, right=True)
++            pick_idxs = torch.clamp(pick_idxs, max=len(cumul_distances) - 1)
++
++
++            indexes_1d = torch.zeros_like(step_distances)
++            indexes_1d[pick_idxs] = 1
++
++            indexes_2d = indexes_1d.view(len(dist_arr), sample_points)
++
++            indexes.append(indexes_2d)
++
++        indexes = torch.cat(indexes)[1: -1] # The first and last candidates don't have spline representations
++
++        indexes_trans = torch.stack([indexes for _ in range(3)], dim=-1)
++        indexes_quat = torch.stack([indexes for _ in range(4)], dim=-1)
++
++        indexes_trans = indexes_trans.to(torch.bool)
++        indexes_quat = indexes_quat.to(torch.bool)
++
++        pred_trans_selected_values = pred_spline[indexes_trans]
++        targ_trans_selected_values = targ_spline[indexes_trans]
++
++        pred_trans_selected_values = pred_trans_selected_values.reshape(-1, 3)
++        targ_trans_selected_values = targ_trans_selected_values.reshape(-1, 3)
++
++        # Calculate the loss for quaternions
++        pred_quat = pred_quat / pred_quat.norm(dim=-1, keepdim=True).clamp(min=1e-8)
++        targ_quat = targ_quat / targ_quat.norm(dim=-1, keepdim=True).clamp(min=1e-8)
++
++        targ_quat_spline = self.uniform_cr_spline(targ_quat.reshape(-1, 4), num_samples_per_segment=sample_points)
++        pred_quat_spline = self.uniform_cr_spline(pred_quat.reshape(-1, 4), num_samples_per_segment=sample_points)
++
++
++        targ_quat_spline = targ_quat_spline[1:-1]
++        pred_quat_spline = pred_quat_spline[1:-1]
++
++
++        pred_quat_selected_values = pred_quat_spline[indexes_quat]
++        targ_quat_selected_values = targ_quat_spline[indexes_quat]
++
++        pred_quat_selected_values = pred_quat_selected_values.reshape(-1, 4)
++        targ_quat_selected_values = targ_quat_selected_values.reshape(-1, 4)
++
++        # Calculate the geodesic loss
++        pred_rot = quaternion_to_matrix(pred_quat_selected_values).reshape(-1, 3, 3)
++        targ_rot = quaternion_to_matrix(targ_quat_selected_values).reshape(-1, 3, 3)
++
++        eps = 1e-12
++        r2r1 = pred_rot @ targ_rot.permute(0, 2, 1)
++        trace = torch.diagonal(r2r1, dim1=-2, dim2=-1).sum(-1)
++        trace = torch.clamp((trace - 1) / 2, -1.0 + eps, 1.0 - eps)
++        geodesic_loss = torch.acos(trace).mean()
++
++        # Calculate the rotation error
++        dot_product = torch.sum(pred_quat_selected_values * targ_quat_selected_values, dim=-1).clamp(min=-1.0 + eps, max=1.0 - eps)
++        quaternion_dist = 1 - (dot_product ** 2).mean()
++
++        # Calculate the L2 loss
++        l2_loss = F.mse_loss(pred_trans_selected_values, targ_trans_selected_values, reduction='mean')
++
++        # Calculate the smoothness loss for translation and quaternion
++        smoothness_multiplier = 10 ** 2 # Empirically determined multiplier for smoothness loss
++        weight_acceleration = 0.1
++        weight_jerk = 0.05
++
++        pos_acc = pred_trans_selected_values[2:, :] - 2 * pred_trans_selected_values[1:-1, :] + pred_trans_selected_values[:-2, :]
++        pos_jerk = pred_trans_selected_values[3:, :] - 3 * pred_trans_selected_values[2:-1, :] + 3 * pred_trans_selected_values[1:-2, :] - pred_trans_selected_values[:-3, :]
++
++        pos_acceleration_loss = torch.mean(pos_acc ** 2)
++        pos_jerk_loss = torch.mean(pos_jerk ** 2)
++
++        q0 = pred_quat_selected_values[:-1, :]
++        q1 = pred_quat_selected_values[1:, :]
++        sign = torch.where((q0 * q1).sum(dim=-1) < 0, -1.0, 1.0)
++        q1 = sign.unsqueeze(-1) * q1
++
++        dq = self.q_multiply(q1, self.q_inverse(q0))
++        theta = 2 * torch.acos(torch.clamp(dq[..., 0], -1.0 + 1e-8, 1.0 - 1e-8))
++
++        rot_acc  = theta[2:] - 2*theta[1:-1] + theta[:-2]
++        rot_jerk = theta[3:] - 3*theta[2:-1] + 3*theta[1:-2] - theta[:-3]
++
++        rot_acceleration_loss = torch.mean(rot_acc ** 2)
++        rot_jerk_loss = torch.mean(rot_jerk ** 2)
++
++        alpha_rot = 0.1        # <-- tune this (e.g. 0.1 … 10)
++
++
++        acceleration_loss = pos_acceleration_loss + alpha_rot * rot_acceleration_loss
++        jerk_loss         = pos_jerk_loss + alpha_rot * rot_jerk_loss
++
++        smoothness_loss = (
++            weight_acceleration * acceleration_loss
++          + weight_jerk        * jerk_loss
++        ) * smoothness_multiplier
++
++
++        # Calculate the spline loss
++        l2_multiplier = 10.0
++        spline_loss = l2_multiplier * (l2_loss + smoothness_loss) + geodesic_loss + quaternion_dist
++
++        dtw_loss, hausdorff_loss, frechet_loss, chamfer_loss = None, None, None, None
++
++        # Uncomment these lines if you want to use the other losses
++        '''
++        dtw = DynamicTimeWarpingLoss()
++        dtw_loss, _ = dtw.forward(pred_trans_selected_values.reshape(-1, 3), targ_trans_selected_values.reshape(-1, 3))
++
++        hausdorff = HausdorffDistanceLoss()
++        hausdorff_loss, _ = hausdorff.forward(pred_trans_selected_values.reshape(-1, 3), targ_trans_selected_values.reshape(-1, 3))
++
++        frec = FrechetDistanceLoss()
++        frechet_loss, _ = frec.forward(pred_trans_selected_values.reshape(-1, 3), targ_trans_selected_values.reshape(-1, 3))
++
++        chamfer = ChamferDistanceLoss()
++        chamfer_loss, _ = chamfer.forward(pred_trans_selected_values.reshape(-1, 3), targ_trans_selected_values.reshape(-1, 3))
++        '''
++
++        return spline_loss, l2_multiplier * l2_loss, l2_multiplier * smoothness_loss, geodesic_loss, quaternion_dist, dtw_loss, hausdorff_loss, frechet_loss, chamfer_loss
++
++
++class DynamicTimeWarpingLoss(nn.Module):
++    def __init__(self):
++        super().__init__()
++
++    def _dtw_distance(self, seq1: torch.Tensor, seq2: torch.Tensor) -> torch.Tensor:
++        """
++        Computes the DTW distance between two 2D tensors (T x D),
++        where T is sequence length and D is feature dimension.
++        """
++        # seq1, seq2 shapes: (time_steps, feature_dim)
++        n, m = seq1.size(0), seq2.size(0)
++
++        # Cost matrix (pairwise distances between all elements)
++        cost = torch.zeros(n, m, device=seq1.device, dtype=seq1.dtype)
++        for i in range(n):
++            for j in range(m):
++                cost[i, j] = torch.norm(seq1[i] - seq2[j], p=2)
++
++        # Accumulated cost matrix
++        dist = torch.full((n + 1, m + 1), float('inf'),
++                          device=seq1.device, dtype=seq1.dtype)
++        dist[0, 0] = 0.0
++
++        # Populate the DP table
++        for i in range(1, n + 1):
++            for j in range(1, m + 1):
++                dist[i, j] = cost[i - 1, j - 1] + torch.min(
++                    torch.min(
++                    dist[i - 1, j],   # Insertion
++                    dist[i, j - 1],   # Deletion
++                    ),
++                    dist[i - 1, j - 1]# Match
++                )
++
++        return dist[n, m]
++
++    def _loss(self, pred: torch.Tensor, targ: torch.Tensor) -> torch.Tensor:
++        """
++        Compute the average DTW loss over a batch of sequences.
++
++        pred, targ shapes: (batch_size, T, D)
++        """
++        # Ensure shapes match in batch dimension
++        assert pred.size(0) == targ.size(0), "Batch sizes must match."
++
++        # Compute DTW distance per sample in the batch
++        distances = []
++        for b in range(pred.size(0)):
++            seq1 = pred[b]
++            seq2 = targ[b]
++            dtw_val = self._dtw_distance(seq1, seq2)
++            distances.append(dtw_val)
++
++        # Stack and take mean to get scalar loss
++        dtw_loss = torch.stack(distances).mean()
++        return dtw_loss
++
++    def forward(self, pred: torch.Tensor, targ: torch.Tensor):
++        """
++        Returns a tuple: (loss, info_dict),
++        where loss is a scalar tensor and info_dict is a dictionary
++        of extra information (e.g., loss components).
++        """
++        loss = self._loss(pred, targ)
++
++        info = {
++            'dtw': loss.item()
++        }
++
++        return loss, info
++
++class HausdorffDistanceLoss(nn.Module):
++    def __init__(self):
++        super().__init__()
++
++    def _hausdorff_distance(self, set1: torch.Tensor, set2: torch.Tensor) -> torch.Tensor:
++        """
++        Computes the Hausdorff distance between two 2D tensors (N x D),
++        where N is the number of points and D is the feature dimension.
++
++        The Hausdorff distance H(A,B) between two sets A and B is defined as:
++            H(A, B) = max( h(A, B), h(B, A) ),
++        where
++            h(A, B) = max_{a in A} min_{b in B} d(a, b).
++
++        Here, d(a, b) is the Euclidean distance between points a and b.
++        """
++        # set1, set2 shapes: (num_points, feature_dim)
++        n, m = set1.size(0), set2.size(0)
++
++        # Compute pairwise distances
++        cost = torch.zeros(n, m, device=set1.device, dtype=set1.dtype)
++        for i in range(n):
++            for j in range(m):
++                cost[i, j] = torch.norm(set1[i] - set2[j], p=2)
++
++        # Forward direction: for each point in set1, find distance to closest point in set2
++        forward_min = cost.min(dim=1)[0]  # Shape (n,)
++        forward_hausdorff = forward_min.max()  # max over n
++
++        # Backward direction: for each point in set2, find distance to closest point in set1
++        backward_min = cost.min(dim=0)[0]  # Shape (m,)
++        backward_hausdorff = backward_min.max()  # max over m
++
++        # Hausdorff distance is the max of the two
++        hausdorff_dist = torch.max(forward_hausdorff, backward_hausdorff)
++        return hausdorff_dist
++
++    def _loss(self, pred: torch.Tensor, targ: torch.Tensor) -> torch.Tensor:
++        """
++        Compute the average Hausdorff distance over a batch of point sets.
++
++        pred, targ shapes: (batch_size, N, D)
++        """
++        # Ensure shapes match in batch dimension
++        assert pred.size(0) == targ.size(0), "Batch sizes must match."
++
++        distances = []
++        for b in range(pred.size(0)):
++            set1 = pred[b]
++            set2 = targ[b]
++            h_dist = self._hausdorff_distance(set1, set2)
++            distances.append(h_dist)
++
++        # Stack and take mean to get scalar loss
++        hausdorff_loss = torch.stack(distances).mean()
++        return hausdorff_loss
++
++    def forward(self, pred: torch.Tensor, targ: torch.Tensor):
++        """
++        Returns a tuple: (loss, info_dict),
++        where loss is a scalar tensor and info_dict is a dictionary
++        of extra information (e.g., distance components).
++        """
++        loss = self._loss(pred, targ)
++
++        info = {
++            'hausdorff': loss.item()
++        }
++
++        return loss, info
++
++class FrechetDistanceLoss(nn.Module):
++    def __init__(self):
++        super().__init__()
++
++    def _frechet_distance(self, seq1: torch.Tensor, seq2: torch.Tensor) -> torch.Tensor:
++        """
++        Computes the (discrete) Fr��chet distance between two 2D tensors (T x D),
++        where T is the sequence length and D is the feature dimension.
++
++        The Fréchet distance between two curves in discrete form can be computed
++        by filling in a DP table “ca” where:
++
++            ca[i, j] = max( d(seq1[i], seq2[j]),
++                            min(ca[i-1, j], ca[i, j-1], ca[i-1, j-1]) )
++
++        with boundary conditions handled appropriately.
++        Here, d(seq1[i], seq2[j]) is the Euclidean distance.
++        """
++        n, m = seq1.size(0), seq2.size(0)
++
++        # Cost matrix (pairwise distances between all elements)
++        cost = torch.zeros(n, m, device=seq1.device, dtype=seq1.dtype)
++        for i in range(n):
++            for j in range(m):
++                cost[i, j] = torch.norm(seq1[i] - seq2[j], p=2)
++
++        # DP matrix for the Fréchet distance
++        ca = torch.full((n, m), float('inf'), device=seq1.device, dtype=seq1.dtype)
++        ca[0, 0] = cost[0, 0]
++
++        # Initialize first row
++        for i in range(1, n):
++            ca[i, 0] = torch.max(ca[i - 1, 0], cost[i, 0])
++
++        # Initialize first column
++        for j in range(1, m):
++            ca[0, j] = torch.max(ca[0, j - 1], cost[0, j])
++
++        # Populate the DP table
++        for i in range(1, n):
++            for j in range(1, m):
++                ca[i, j] = torch.max(
++                    cost[i, j],
++                    torch.min(
++                        torch.min(
++                        ca[i - 1, j],
++                        ca[i, j - 1],
++                        ),
++                        ca[i - 1, j - 1]
++                    )
++                )
++
++        return ca[n - 1, m - 1]
++
++    def _loss(self, pred: torch.Tensor, targ: torch.Tensor) -> torch.Tensor:
++        """
++        Compute the average Fréchet distance over a batch of sequences.
++
++        pred, targ shapes: (batch_size, T, D)
++        """
++        # Ensure shapes match in batch dimension
++        assert pred.size(0) == targ.size(0), "Batch sizes must match."
++
++        distances = []
++        for b in range(pred.size(0)):
++            seq1 = pred[b]
++            seq2 = targ[b]
++            fd_val = self._frechet_distance(seq1, seq2)
++            distances.append(fd_val)
++
++        # Stack and take mean to get scalar loss
++        frechet_loss = torch.stack(distances).mean()
++        return frechet_loss
++
++    def forward(self, pred: torch.Tensor, targ: torch.Tensor):
++        """
++        Returns a tuple: (loss, info_dict),
++        where loss is a scalar tensor and info_dict is a dictionary
++        of extra information (e.g., distance components).
++        """
++        loss = self._loss(pred, targ)
++        info = {
++            'frechet': loss.item()
++        }
++        return loss, info
++
++class ChamferDistanceLoss(nn.Module):
++    def __init__(self):
++        super().__init__()
++
++    def _chamfer_distance(self, set1: torch.Tensor, set2: torch.Tensor) -> torch.Tensor:
++        """
++        Computes the symmetrical Chamfer distance between
++        two 2D tensors (N x D), where N is the number of points
++        and D is the feature dimension.
++
++        The Chamfer distance between two point sets A and B is often defined as:
++
++            d_chamfer(A, B) = 1/|A| ∑_{a ∈ A} min_{b ∈ B} ‖a - b‖₂
++                              + 1/|B| ∑_{b ∈ B} min_{a ∈ A} ‖b - a‖₂,
++
++        where ‖·‖₂ is the Euclidean distance.
++        """
++        # set1, set2 shapes: (num_points, feature_dim)
++        n, m = set1.size(0), set2.size(0)
++
++        cost = torch.zeros(n, m, device=set1.device, dtype=set1.dtype)
++        for i in range(n):
++            for j in range(m):
++                cost[i, j] = torch.norm(set1[i] - set2[j], p=2)
++
++        # For each point in set1, find distance to the closest point in set2
++        forward_min = cost.min(dim=1)[0]   # shape: (n,)
++        forward_mean = forward_min.mean()
++
++        # For each point in set2, find distance to the closest point in set1
++        backward_min = cost.min(dim=0)[0]  # shape: (m,)
++        backward_mean = backward_min.mean()
++
++        chamfer_dist = forward_mean + backward_mean
++        return chamfer_dist
++
++    def _loss(self, pred: torch.Tensor, targ: torch.Tensor) -> torch.Tensor:
++        """
++        Compute the average Chamfer distance over a batch of point sets.
++
++        pred, targ shapes: (batch_size, N, D)
++        """
++        # Ensure shapes match in batch dimension
++        assert pred.size(0) == targ.size(0), "Batch sizes must match."
++
++        distances = []
++        for b in range(pred.size(0)):
++            set1 = pred[b]
++            set2 = targ[b]
++            distance_val = self._chamfer_distance(set1, set2)
++            distances.append(distance_val)
++
++        # Combine into a single scalar
++        chamfer_loss = torch.stack(distances).mean()
++        return chamfer_loss
++
++    def forward(self, pred: torch.Tensor, targ: torch.Tensor):
++        """
++        Returns a tuple: (loss, info_dict),
++        where 'loss' is a scalar tensor and 'info_dict' is a dictionary
++        of extra information (e.g., distance components).
++        """
++        loss = self._loss(pred, targ)
++        info = {
++            'chamfer': loss.item()
++        }
++        return loss, info
++
++
++def slerp(q1, q2, t):
++    """Spherical linear interpolation between two quaternions."""
++    q1 = q1 / np.linalg.norm(q1)
++    q2 = q2 / np.linalg.norm(q2)
++    dot = np.dot(q1, q2)
++
++    if dot < 0.0:
++        q2 = -q2
++        dot = -dot
++    # If dot is very close to 1, use linear interpolation
++
++    if dot > 0.9995:
++        result = q1 + t * (q2 - q1)
++        result = result / np.linalg.norm(result)
++        return result
++
++    theta_0 = np.arccos(dot)
++    theta = theta_0 * t
++
++    q3 = q2 - q1 * dot
++    q3 = q3 / np.linalg.norm(q3)
++    return q1 * np.cos(theta) + q3 * np.sin(theta)
++
++def catmull_rom_spline_with_rotation(control_points, timepoints, horizon):
++    """Compute Catmull-Rom spline for both position and quaternion rotation."""
++    spline_points = []
++    # Extrapolate the initial points
++    if timepoints[0] != 0:
++        for t in range(timepoints[0]):
++            x = control_points[0][0]
++            y = control_points[0][1]
++            z = control_points[0][2]
++            q = control_points[0][3:7]
++            spline_points.append(np.concatenate([np.array([x, y, z]), q]))
++
++    #Linear interpolate between 0th and 1th control points
++    for t in np.linspace(0, 1, timepoints[1] - timepoints[0] + 1):
++        x = control_points[0][0] + t * (control_points[1][0] - control_points[0][0])
++        y = control_points[0][1] + t * (control_points[1][1] - control_points[0][1])
++        z = control_points[0][2] + t * (control_points[1][2] - control_points[0][2])
++        q = slerp(control_points[0][3:7], control_points[1][3:7], t)
++        spline_points.append(np.concatenate([np.array([x, y, z]), q]))
++
++
++    # Iterate over the control points
++    for i in range(1, len(control_points) - 2):
++        P0 = control_points[i-1][:3]
++        P1 = control_points[i][:3]
++        P2 = control_points[i+1][:3]
++        P3 = control_points[i+2][:3]
++        Q0 = control_points[i-1][3:7]
++        Q1 = control_points[i][3:7]
++        Q2 = control_points[i+1][3:7]
++        Q3 = control_points[i+2][3:7]
++
++        # Interpolate position (using Catmull-Rom spline)
++        for idx, t in enumerate(np.linspace(0, 1, timepoints[i+1] - timepoints[i] + 1)):
++            if idx == 0:
++                continue
++
++            x = 0.5 * ((2 * P1[0]) + (-P0[0] + P2[0]) * t +
++                       (2 * P0[0] - 5 * P1[0] + 4 * P2[0] - P3[0]) * t**2 +
++                       (-P0[0] + 3 * P1[0] - 3 * P2[0] + P3[0]) * t**3)
++            y = 0.5 * ((2 * P1[1]) + (-P0[1] + P2[1]) * t +
++                       (2 * P0[1] - 5 * P1[1] + 4 * P2[1] - P3[1]) * t**2 +
++                       (-P0[1] + 3 * P1[1] - 3 * P2[1] + P3[1]) * t**3)
++            z = 0.5 * ((2 * P1[2]) + (-P0[2] + P2[2]) * t +
++                       (2 * P0[2] - 5 * P1[2] + 4 * P2[2] - P3[2]) * t**2 +
++                       (-P0[2] + 3 * P1[2] - 3 * P2[2] + P3[2]) * t**3)
++            q = slerp(Q1, Q2, t)
++            spline_points.append(np.concatenate([np.array([x, y, z]), q]))
++
++    #Linear interpolate between 2nd last and last control points
++    for idx, t in enumerate(np.linspace(0, 1, timepoints[-1] - timepoints[-2] + 1)):
++        if idx == 0:
++            continue
++        x = control_points[-2][0] + t * (control_points[-1][0] - control_points[-2][0])
++        y = control_points[-2][1] + t * (control_points[-1][1] - control_points[-2][1])
++        z = control_points[-2][2] + t * (control_points[-1][2] - control_points[-2][2])
++        q = slerp(control_points[-2][3:7], control_points[-1][3:7], t)
++        spline_points.append(np.concatenate([np.array([x, y, z]), q]))
++
++    # Extrapolate the rest of the points
++    if timepoints[-1] != horizon:
++        for t in range(timepoints[-1] + 1, horizon):
++            x = control_points[-1][0]
++            y = control_points[-1][1]
++            z = control_points[-1][2]
++            q = control_points[-1][3:7]
++            spline_points.append(np.concatenate([np.array([x, y, z]), q]))
++
++    stacked_spline_points = np.stack(spline_points, axis=0)
++
++    if control_points.shape[1] != 7:
++        stacked_spline_points = np.concatenate([stacked_spline_points, np.zeros((stacked_spline_points.shape[0], 1))], axis=1)
++
++
++    return stacked_spline_points
++
++def catmull_rom_loss(trajectories, conditions, loss_fc):
++    '''
++        loss for catmull-rom interpolation
++    '''
++    batch_size, horizon, transition = trajectories.shape
++
++    # Extract known indices and values
++    known_indices = np.array(list(conditions.keys()), dtype=int)
++
++    # candidate_no x batch_size x dim
++    known_values = np.stack([c.cpu().numpy() for c in conditions.values()], axis=0)
++    known_values = np.moveaxis(known_values, 0, 1)
++
++    # Sort the timepoints
++    sorted_indices = np.argsort(known_indices)
++    known_indices = known_indices[sorted_indices]
++    known_values = known_values[:, sorted_indices]
++    spline_points = np.array([catmull_rom_spline_with_rotation(known_values[b], known_indices, horizon) for b in range(batch_size)])
++
++    # Convert to tensor and move to the same device as trajectories
++    spline_points = torch.tensor(spline_points, dtype=torch.float64, device=trajectories.device)
++    assert spline_points.shape == trajectories.shape, f"Shape mismatch: {spline_points.shape} != {trajectories.shape}"
++    return loss_fc(spline_points, trajectories)
++
+ Losses = {
+     'l1': WeightedL1,
+     'l2': WeightedL2,
+     'value_l1': ValueL1,
+     'value_l2': ValueL2,
++    'geodesic_l2': GeodesicL2Loss,
++    'rotation_translation': RotationTranslationLoss,
++    'spline': SplineLoss,
+ }
+diff --git a/diffuser/models/temporal.py b/diffuser/models/temporal.py
+index e0b9e5c..0f7854a 100644
+--- a/diffuser/models/temporal.py
++++ b/diffuser/models/temporal.py
+@@ -17,18 +17,18 @@ class ResidualTemporalBlock(nn.Module):
+         super().__init__()
+         self.blocks = nn.ModuleList([
+-            Conv1dBlock(inp_channels, out_channels, kernel_size),
+-            Conv1dBlock(out_channels, out_channels, kernel_size),
++            Conv1dBlock(inp_channels, out_channels, kernel_size).to(dtype=torch.float64),
++            Conv1dBlock(out_channels, out_channels, kernel_size).to(dtype=torch.float64),
+         ])
+         self.time_mlp = nn.Sequential(
+             nn.Mish(),
+-            nn.Linear(embed_dim, out_channels),
++            nn.Linear(embed_dim, out_channels).to(dtype=torch.float64),
+             Rearrange('batch t -> batch t 1'),
+-        )
++        ).to(dtype=torch.float64)
+-        self.residual_conv = nn.Conv1d(inp_channels, out_channels, 1) \
+-            if inp_channels != out_channels else nn.Identity()
++        self.residual_conv = nn.Conv1d(inp_channels, out_channels, 1).to(dtype=torch.float64) \
++            if inp_channels != out_channels else nn.Identity().to(dtype=torch.float64)
+     def forward(self, x, t):
+         '''
+@@ -37,7 +37,8 @@ class ResidualTemporalBlock(nn.Module):
+             returns:
+             out : [ batch_size x out_channels x horizon ]
+         '''
+-        out = self.blocks[0](x) + self.time_mlp(t)
++
++        out = self.blocks[0](x) + self.time_mlp(t.double())
+         out = self.blocks[1](out)
+         return out + self.residual_conv(x)
+@@ -49,11 +50,11 @@ class TemporalUnet(nn.Module):
+         transition_dim,
+         cond_dim,
+         dim=32,
+-        dim_mults=(1, 2, 4, 8),
++        dim_mults=(1, 2, 4),
+     ):
+         super().__init__()
+-        dims = [transition_dim, *map(lambda m: dim * m, dim_mults)]
++        dims = [(transition_dim + cond_dim), *map(lambda m: dim * m, dim_mults)]
+         in_out = list(zip(dims[:-1], dims[1:]))
+         print(f'[ models/temporal ] Channel dimensions: {in_out}')
+@@ -100,7 +101,7 @@ class TemporalUnet(nn.Module):
+         self.final_conv = nn.Sequential(
+             Conv1dBlock(dim, dim, kernel_size=5),
+-            nn.Conv1d(dim, transition_dim, 1),
++            nn.Conv1d(dim, transition_dim, 1).to(dtype=torch.float64),
+         )
+     def forward(self, x, cond, time):
+@@ -129,7 +130,6 @@ class TemporalUnet(nn.Module):
+             x = upsample(x)
+         x = self.final_conv(x)
+-
+         x = einops.rearrange(x, 'b t h -> b h t')
+         return x
+diff --git a/diffuser/utils/arrays.py b/diffuser/utils/arrays.py
+index c3a9d24..96a7093 100644
+--- a/diffuser/utils/arrays.py
++++ b/diffuser/utils/arrays.py
+@@ -54,7 +54,7 @@ def batchify(batch):
+ 			1) converting np arrays to torch tensors and
+ 			2) and ensuring that everything has a batch dimension
+ 	'''
+-	fn = lambda x: to_torch(x[None])
++	fn = lambda x: to_torch(x[None], dtype=torch.float64)
+ 	batched_vals = []
+ 	for field in batch._fields:
+diff --git a/diffuser/utils/serialization.py b/diffuser/utils/serialization.py
+index 6cc9db9..039eb64 100644
+--- a/diffuser/utils/serialization.py
++++ b/diffuser/utils/serialization.py
+@@ -19,7 +19,7 @@ def mkdir(savepath):
+         return False
+ def get_latest_epoch(loadpath):
+-    states = glob.glob1(os.path.join(*loadpath), 'state_*')
++    states = glob.glob1(os.path.join(loadpath), 'state_*')
+     latest_epoch = -1
+     for state in states:
+         epoch = int(state.replace('state_', '').replace('.pt', ''))
+diff --git a/diffuser/utils/training.py b/diffuser/utils/training.py
+index be3556e..c21e0f0 100644
+--- a/diffuser/utils/training.py
++++ b/diffuser/utils/training.py
+@@ -4,16 +4,24 @@ import numpy as np
+ import torch
+ import einops
+ import pdb
++from tqdm import tqdm
++import wandb
++from pytorch3d.transforms import axis_angle_to_quaternion
+ from .arrays import batch_to_device, to_np, to_device, apply_dict
+ from .timer import Timer
+ from .cloud import sync_logs
++from ..models.helpers import catmull_rom_spline_with_rotation
+ def cycle(dl):
+     while True:
+         for data in dl:
+             yield data
++def assert_no_nan_weights(model):
++    for name, param in model.named_parameters():
++        assert not torch.isnan(param).any(), f"NaN detected in parameter: {name}"
++
+ class EMA():
+     '''
+         empirical moving average
+@@ -71,13 +79,35 @@ class Trainer(object):
+         self.gradient_accumulate_every = gradient_accumulate_every
+         self.dataset = dataset
+-        self.dataloader = cycle(torch.utils.data.DataLoader(
+-            self.dataset, batch_size=train_batch_size, num_workers=1, shuffle=True, pin_memory=True
++        dataset_size = len(self.dataset)
++
++        # Read the indices from the .txt file
++        with open(os.path.join(results_folder, 'train_indices.txt'), 'r') as f:
++            self.train_indices = f.read()
++            self.train_indices = [int(i) for i in self.train_indices.split('\n') if i]
++
++        with open(os.path.join(results_folder, 'val_indices.txt'), 'r') as f:
++            self.val_indices = f.read()
++            self.val_indices = [int(i) for i in self.val_indices.split('\n') if i]
++
++
++        self.train_dataset = torch.utils.data.Subset(self.dataset, self.train_indices)
++        self.val_dataset = torch.utils.data.Subset(self.dataset, self.val_indices)
++        self.train_dataloader = cycle(torch.utils.data.DataLoader(
++            self.train_dataset, batch_size=train_batch_size, num_workers=1, pin_memory=True, shuffle=False
++        ))
++
++        self.val_dataloader = cycle(torch.utils.data.DataLoader(
++            self.val_dataset, batch_size=train_batch_size, num_workers=1, pin_memory=True, shuffle=False
+         ))
++
+         self.dataloader_vis = cycle(torch.utils.data.DataLoader(
+             self.dataset, batch_size=1, num_workers=0, shuffle=True, pin_memory=True
+         ))
+         self.renderer = renderer
++
++
++
+         self.optimizer = torch.optim.Adam(diffusion_model.parameters(), lr=train_lr)
+         self.logdir = results_folder
+@@ -88,6 +118,8 @@ class Trainer(object):
+         self.reset_parameters()
+         self.step = 0
++        self.log_to_wandb = False
++
+     def reset_parameters(self):
+         self.ema_model.load_state_dict(self.model.state_dict())
+@@ -102,36 +134,129 @@ class Trainer(object):
+     #-----------------------------------------------------------------------------#
+     def train(self, n_train_steps):
+-
++        # Save the indices as .txt files
++        with open(os.path.join(self.logdir, 'train_indices.txt'), 'w') as f:
++            for idx in self.train_indices:
++                f.write(f"{idx}\n")
++        with open(os.path.join(self.logdir, 'val_indices.txt'), 'w') as f:
++            for idx in self.val_indices:
++                f.write(f"{idx}\n")
++
+         timer = Timer()
+-        for step in range(n_train_steps):
++        torch.autograd.set_detect_anomaly(True)
++
++        # Setup wandb
++        if self.log_to_wandb:
++            wandb.init(
++                project='trajectory-generation',
++                config={'lr': self.optimizer.param_groups[0]['lr'], 'batch_size': self.batch_size, 'gradient_accumulate_every': self.gradient_accumulate_every},
++            )
++
++        for step in tqdm(range(n_train_steps)):
++
++            mean_train_loss = 0.0
+             for i in range(self.gradient_accumulate_every):
+-                batch = next(self.dataloader)
++                batch = next(self.train_dataloader)
+                 batch = batch_to_device(batch)
+-
+-                loss, infos = self.model.loss(*batch)
++
++                loss, infos = self.model.loss(x=batch.trajectories, cond=batch.conditions)
+                 loss = loss / self.gradient_accumulate_every
++                mean_train_loss += loss.item()
+                 loss.backward()
++            if self.log_to_wandb:
++                wandb.log({
++                    'step': self.step,
++                    'train/loss': mean_train_loss
++                })
++
++            # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
++
+             self.optimizer.step()
+             self.optimizer.zero_grad()
++            assert_no_nan_weights(self.model)
++
+             if self.step % self.update_ema_every == 0:
+                 self.step_ema()
+             if self.step % self.save_freq == 0:
+-                label = self.step // self.label_freq * self.label_freq
++                label = self.step
++                print(f'Saving model at step {self.step}...')
+                 self.save(label)
+             if self.step % self.log_freq == 0:
+-                infos_str = ' | '.join([f'{key}: {val:8.4f}' for key, val in infos.items()])
+-                print(f'{self.step}: {loss:8.4f} | {infos_str} | t: {timer():8.4f}')
++                val_losses = []
++                lin_int_losses = []
++
++                val_infos_list = []
++                lin_int_infos_list = []
++
++                catmull_losses = []
++                catmull_infos_list = []
++
++                for _ in range(len(self.val_indices)):
++                    val_batch = next(self.val_dataloader)
++                    val_batch = batch_to_device(val_batch)
++
++                    traj = self.model.forward(val_batch.conditions, horizon=val_batch.trajectories.shape[1])
++                    val_loss, val_infos = self.model.loss_fn(traj, val_batch.trajectories, cond=val_batch.conditions)
++
++                    val_losses.append(val_loss.item())
++                    val_infos_list.append({key: val for key, val in val_infos.items()})
++
++
++                    (lin_int_loss, lin_int_infos), lin_int_traj = self.linear_interpolation_loss(
++                        val_batch.trajectories, val_batch.conditions, self.model.loss_fn
++                    )
++                    lin_int_losses.append(lin_int_loss.item())
++                    lin_int_infos_list.append({key: val for key, val in lin_int_infos.items()})
++
++                    (catmull_loss, catmull_infos), catmull_traj = self.catmull_rom_loss(
++                        val_batch.trajectories, val_batch.conditions, self.model.loss_fn
++                    )
++
++                    catmull_losses.append(catmull_loss.item())
++                    catmull_infos_list.append(catmull_infos)
++
++                avg_val_loss = np.mean(val_losses)
++                avg_lin_int_loss = np.mean(lin_int_losses)
++
++                val_infos = {key: np.mean([info[key] for info in val_infos_list]) for key in val_infos_list[0].keys()}
++                lin_int_infos = {key: np.mean([info[key] for info in lin_int_infos_list]) for key in lin_int_infos_list[0].keys()}
+-            if self.step == 0 and self.sample_freq:
+-                self.render_reference(self.n_reference)
++                avg_catmull_loss = np.mean(catmull_losses)
++                catmull_infos = {key: np.mean([info[key] for info in catmull_infos_list]) for key in catmull_infos_list[0].keys()}
+-            if self.sample_freq and self.step % self.sample_freq == 0:
+-                self.render_samples(n_samples=self.n_samples)
++                val_infos_str = ' | '.join([f'{key}: {val:8.4f}' for key, val in val_infos.items()])
++                lin_int_infos_str = ' | '.join([f'{key}: {val:8.4f}' for key, val in lin_int_infos.items()])
++                catmull_infos_str = ' | '.join([f'{key}: {val:8.4f}' for key, val in catmull_infos.items()])
++
++
++                infos_str = ' | '.join([f'{key}: {val:8.4f}' for key, val in infos.items()])
++                print("Learning Rate: ", self.optimizer.param_groups[0]['lr'])
++                print(f'Step {self.step}: {loss * self.gradient_accumulate_every:8.4f} | {infos_str} | t: {timer():8.4f}')
++                print(f'Validation - {self.step}: {avg_val_loss:8.4f} | {val_infos_str} | t: {timer():8.4f}')
++                print(f'Linear Interpolation Loss - {self.step}: {avg_lin_int_loss:8.4f} | {lin_int_infos_str} | t: {timer():8.4f}')
++                print(f'Catmull Rom Loss - {self.step}: {avg_catmull_loss:8.4f} | {catmull_infos_str} | t: {timer():8.4f}')
++                print()
++
++                if self.log_to_wandb:
++                    wandb.log({
++                        'step': self.step,
++                        'val/loss': avg_val_loss,
++                        'val/linear_interp/loss': avg_lin_int_loss,
++                        'val/linear_interp/quaternion dist.': lin_int_infos['quat. dist.'],
++                        'val/linear_interp/euclidean dist.': lin_int_infos['trans. error'],
++                        'val/linear_interp/geodesic loss': lin_int_infos['geodesic dist.'],
++                        'val/catmull_rom/loss': avg_catmull_loss,
++                        'val/catmull_rom/quaternion dist.': catmull_infos['quat. dist.'],
++                        'val/catmull_rom/euclidean dist.': catmull_infos['trans. error'],
++                        'val/catmull_rom/geodesic loss': catmull_infos['geodesic dist.'],
++                        'val/quaternion dist.': val_infos['quat. dist.'],
++                        'val/euclidean dist.': val_infos['trans. error'],
++                        'val/geodesic loss': val_infos['geodesic dist.'],
++                    })
+             self.step += 1
+@@ -186,15 +311,6 @@ class Trainer(object):
+         normed_observations = trajectories[:, :, self.dataset.action_dim:]
+         observations = self.dataset.normalizer.unnormalize(normed_observations, 'observations')
+-        # from diffusion.datasets.preprocessing import blocks_cumsum_quat
+-        # # observations = conditions + blocks_cumsum_quat(deltas)
+-        # observations = conditions + deltas.cumsum(axis=1)
+-
+-        #### @TODO: remove block-stacking specific stuff
+-        # from diffusion.datasets.preprocessing import blocks_euler_to_quat, blocks_add_kuka
+-        # observations = blocks_add_kuka(observations)
+-        ####
+-
+         savepath = os.path.join(self.logdir, f'_sample-reference.png')
+         self.renderer.composite(savepath, observations)
+@@ -225,9 +341,6 @@ class Trainer(object):
+             # [ 1 x 1 x observation_dim ]
+             normed_conditions = to_np(batch.conditions[0])[:,None]
+-            # from diffusion.datasets.preprocessing import blocks_cumsum_quat
+-            # observations = conditions + blocks_cumsum_quat(deltas)
+-            # observations = conditions + deltas.cumsum(axis=1)
+             ## [ n_samples x (horizon + 1) x observation_dim ]
+             normed_observations = np.concatenate([
+@@ -238,10 +351,70 @@ class Trainer(object):
+             ## [ n_samples x (horizon + 1) x observation_dim ]
+             observations = self.dataset.normalizer.unnormalize(normed_observations, 'observations')
+-            #### @TODO: remove block-stacking specific stuff
+-            # from diffusion.datasets.preprocessing import blocks_euler_to_quat, blocks_add_kuka
+-            # observations = blocks_add_kuka(observations)
+-            ####
+-
+             savepath = os.path.join(self.logdir, f'sample-{self.step}-{i}.png')
+             self.renderer.composite(savepath, observations)
++
++    def linear_interpolation_loss(self, trajectories, conditions, loss_fc, scene_id=None, norm_params=None):
++        batch_size, horizon, transition = trajectories.shape
++
++        # Extract known indices and values
++        known_indices = np.array(list(conditions.keys()), dtype=int)
++        # candidate_no x batch_size x dim
++        known_values = np.stack([c.cpu().numpy() for c in conditions.values()], axis=0)
++        known_values = np.moveaxis(known_values, 0, 1)
++
++        # Create time steps for interpolation
++        time_steps = np.linspace(0, horizon, num=horizon)
++
++        # Perform interpolation across all dimensions at once
++        linear_int_arr = np.array([[
++            np.interp(time_steps, known_indices, known_values[b, :, dim])
++            for dim in range(transition)]
++            for b in range(batch_size)]
++        ).T  # Transpose to match shape (horizon, transition)
++
++        # Convert to tensor and move to the same device as trajectories
++        linear_int_arr = np.transpose(linear_int_arr, axes=[2, 0, 1])
++        linear_int_tensor = torch.tensor(linear_int_arr, dtype=torch.float64, device=trajectories.device)
++
++        return loss_fc(linear_int_tensor, trajectories, cond=conditions, scene_id=scene_id, norm_params=norm_params), linear_int_tensor
++
++
++    def catmull_rom_loss(self, trajectories, conditions, loss_fc, scene_id=None, norm_params=None):
++        '''
++            loss for catmull-rom interpolation
++        '''
++
++        batch_size, horizon, transition = trajectories.shape
++
++        # Extract known indices and values
++        known_indices = np.array(list(conditions.keys()), dtype=int)
++        # candidate_no x batch_size x dim
++        known_values = np.stack([c.cpu().numpy() for c in conditions.values()], axis=0)
++        known_values = np.moveaxis(known_values, 0, 1)
++
++        # Sort the timepoints
++        sorted_indices = np.argsort(known_indices)
++        known_indices = known_indices[sorted_indices]
++        known_values = known_values[:, sorted_indices]
++
++        spline_points = np.array([catmull_rom_spline_with_rotation(known_values[b], known_indices, horizon) for b in range(batch_size)])
++
++        # Convert to tensor and move to the same device as trajectories
++        spline_points = torch.tensor(spline_points, dtype=torch.float64, device=trajectories.device)
++
++        assert spline_points.shape == trajectories.shape, f"Shape mismatch: {spline_points.shape} != {trajectories.shape}"
++
++        return loss_fc(spline_points, trajectories, cond=conditions, scene_id=scene_id, norm_params=norm_params), spline_points
++
++
++
++
++
++
++
++
++
++
++
++
+diff --git a/scripts/train.py b/scripts/train.py
+index 2c5f299..6728d6f 100644
+--- a/scripts/train.py
++++ b/scripts/train.py
+@@ -108,6 +108,7 @@ utils.report_parameters(model)
+ print('Testing forward...', end=' ', flush=True)
+ batch = utils.batchify(dataset[0])
++
+ loss, _ = diffusion.loss(*batch)
+ loss.backward()
+ print('✓')

residual-diffuser/diffusion_config.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02206556f60d5d7911ade8ae3a68cc6c59c8ce65aa6f16481125122ea83a827b
+size 316

residual-diffuser/model_config.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c7d03e458df1b0f5eec375eaedbc0daaab6a95a996e158fbbfcf4128a25fc1e
+size 202

residual-diffuser/render_config.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddf10f1f4223e218c38e6190698e58a382fa33344e2d587bcde53f4700444683
+size 156

residual-diffuser/state_58000.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf0e30c9d06d00f9e8651f621640ab063f766fef6f85d0dd4912fe150ec6f083
+size 59009153

residual-diffuser/test_indices.txt ADDED Viewed

	@@ -0,0 +1,100 @@

+369
+764
+474
+312
+857
+384
+323
+548
+796
+212
+595
+388
+444
+120
+598
+302
+633
+688
+653
+20
+665
+67
+130
+56
+822
+160
+169
+30
+623
+200
+520
+13
+273
+296
+411
+530
+367
+579
+788
+387
+8
+216
+738
+527
+35
+713
+416
+422
+492
+680
+757
+435
+218
+643
+489
+481
+54
+760
+558
+485
+666
+619
+806
+724
+742
+452
+445
+137
+165
+260
+855
+95
+191
+736
+71
+860
+210
+176
+662
+480
+583
+34
+471
+772
+393
+466
+469
+111
+687
+125
+231
+123
+366
+304
+262
+97
+597
+177
+636
+350

residual-diffuser/train_indices.txt ADDED Viewed

	@@ -0,0 +1,691 @@

+749
+792
+22
+3
+775
+420
+83
+284
+635
+376
+700
+754
+575
+115
+122
+751
+826
+695
+263
+577
+856
+336
+249
+150
+226
+248
+478
+617
+535
+10
+329
+46
+821
+206
+807
+147
+345
+766
+768
+254
+164
+188
+133
+437
+716
+532
+391
+426
+105
+728
+463
+864
+5
+178
+640
+774
+837
+309
+348
+850
+205
+314
+346
+385
+423
+425
+707
+163
+415
+412
+599
+503
+490
+319
+693
+274
+156
+316
+135
+721
+153
+72
+162
+765
+684
+138
+90
+834
+229
+673
+195
+94
+569
+270
+786
+342
+745
+356
+80
+685
+677
+544
+220
+672
+668
+361
+7
+31
+203
+142
+539
+421
+378
+16
+816
+443
+696
+276
+151
+472
+616
+305
+681
+93
+491
+849
+183
+300
+675
+753
+294
+649
+691
+175
+467
+144
+501
+858
+779
+869
+246
+867
+333
+258
+414
+84
+486
+632
+0
+519
+830
+600
+541
+52
+631
+198
+626
+278
+552
+547
+235
+559
+528
+353
+86
+88
+718
+234
+828
+295
+829
+646
+874
+564
+525
+810
+682
+250
+861
+217
+748
+113
+740
+505
+770
+787
+611
+642
+550
+63
+567
+549
+21
+588
+524
+752
+747
+48
+53
+285
+335
+66
+453
+124
+360
+815
+390
+781
+543
+201
+283
+141
+434
+230
+613
+193
+608
+508
+199
+732
+741
+222
+76
+555
+261
+96
+436
+282
+45
+589
+11
+459
+382
+57
+877
+70
+537
+801
+129
+722
+494
+823
+26
+377
+326
+820
+310
+77
+876
+565
+504
+406
+686
+811
+482
+499
+507
+458
+386
+847
+658
+708
+100
+60
+607
+817
+663
+428
+859
+313
+68
+32
+267
+701
+139
+349
+487
+289
+225
+840
+375
+186
+875
+832
+381
+667
+777
+515
+298
+862
+773
+509
+715
+449
+664
+358
+121
+172
+594
+39
+553
+468
+370
+424
+570
+614
+477
+645
+400
+179
+441
+767
+865
+55
+497
+288
+704
+551
+809
+498
+354
+82
+408
+157
+460
+98
+145
+439
+591
+556
+211
+606
+9
+538
+719
+720
+641
+240
+841
+17
+112
+465
+733
+372
+338
+268
+219
+365
+624
+114
+87
+585
+128
+14
+612
+803
+215
+836
+297
+255
+795
+495
+730
+29
+838
+99
+161
+814
+51
+794
+36
+170
+171
+644
+271
+281
+131
+603
+514
+562
+389
+180
+239
+118
+593
+407
+574
+79
+714
+655
+394
+622
+166
+802
+780
+639
+101
+19
+253
+674
+763
+269
+427
+364
+689
+526
+800
+303
+207
+630
+184
+168
+290
+392
+251
+561
+506
+65
+450
+651
+399
+484
+190
+851
+28
+637
+202
+657
+656
+868
+808
+568
+223
+236
+108
+580
+853
+75
+58
+327
+671
+510
+578
+797
+536
+110
+602
+196
+798
+790
+185
+670
+448
+523
+692
+430
+140
+383
+252
+531
+746
+456
+557
+522
+690
+782
+286
+92
+618
+723
+605
+49
+563
+243
+64
+455
+804
+299
+292
+529
+835
+2
+831
+197
+479
+102
+470
+47
+762
+518
+703
+842
+337
+678
+698
+189
+247
+410
+213
+401
+277
+280
+173
+328
+818
+744
+238
+315
+676
+872
+756
+244
+291
+727
+155
+208
+25
+339
+755
+844
+592
+321
+521
+546
+44
+242
+759
+769
+652
+181
+275
+50
+833
+371
+584
+758
+433
+279
+627
+107
+15
+109
+854
+227
+596
+395
+182
+778
+648
+825
+628
+18
+717
+398
+601
+566
+625
+447
+660
+647
+866
+735
+462
+590
+351
+659
+330
+634
+126
+334
+324
+783
+516
+500
+743
+739
+784
+457
+709
+318
+726
+192
+697
+69
+204
+669
+461
+413
+650
+362
+824
+127
+871
+805
+355
+442
+347
+209
+117
+306
+332
+379
+42
+152
+512
+638
+106
+187
+194
+159
+89
+712
+119
+307
+214
+403
+705
+582
+586
+264
+502
+488
+409
+621
+233
+340
+863
+737
+432
+24
+576
+454
+464
+85
+404
+517
+451
+513
+483
+363
+317
+573
+620
+74
+609
+706
+301
+259
+812
+679
+396
+610
+476
+417
+827
+405
+325
+581
+6
+846
+418
+132
+710
+272
+571
+368
+533
+839
+402
+380
+245
+174
+228
+266
+143
+785
+587
+661
+344
+38
+848
+154
+265
+771
+791
+761
+493
+542
+359
+62
+149

residual-diffuser/trainer_config.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b582355f277afd95fac231f750f74b3e2e3e301b0420e40098d54537790de3ce
+size 381

residual-diffuser/val_indices.txt ADDED Viewed

	@@ -0,0 +1,87 @@

+136
+604
+683
+146
+654
+373
+511
+73
+343
+311
+819
+331
+750
+33
+341
+694
+237
+572
+91
+134
+357
+540
+729
+776
+789
+61
+852
+699
+12
+232
+734
+43
+27
+545
+158
+224
+438
+629
+104
+554
+429
+534
+873
+322
+496
+446
+287
+397
+1
+293
+725
+81
+440
+419
+702
+870
+308
+793
+103
+843
+78
+256
+475
+560
+711
+813
+431
+374
+731
+23
+167
+37
+4
+352
+116
+148
+59
+845
+221
+257
+40
+615
+473
+320
+241
+799
+41