diff --git "a/pytorch_model.bin.index.json" "b/pytorch_model.bin.index.json" --- "a/pytorch_model.bin.index.json" +++ "b/pytorch_model.bin.index.json" @@ -1,6 +1,6 @@ { "metadata": { - "total_size": 31141720064 + "total_size": 31123918848 }, "weight_map": { "lm_head.weight": "pytorch_model-00001-of-00004.bin", @@ -13,25 +13,21 @@ "transformer.h.0.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.0.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.0.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.0.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.0.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.0.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -45,13 +41,11 @@ "transformer.h.0.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.0.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.0.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.0.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -61,25 +55,21 @@ "transformer.h.1.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.1.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.1.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.1.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.1.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.1.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -93,13 +83,11 @@ "transformer.h.1.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.1.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.1.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.1.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -109,25 +97,21 @@ "transformer.h.10.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.10.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.10.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.10.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.10.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.10.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -141,13 +125,11 @@ "transformer.h.10.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.10.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.10.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.10.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -157,25 +139,21 @@ "transformer.h.11.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.11.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.11.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.11.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.11.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.11.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -189,13 +167,11 @@ "transformer.h.11.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.11.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.11.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.11.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -205,25 +181,21 @@ "transformer.h.12.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.12.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.12.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.12.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.12.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.12.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.12.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -237,13 +209,11 @@ "transformer.h.12.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.12.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.12.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.12.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.12.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.12.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.12.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.12.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.12.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.12.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -253,25 +223,21 @@ "transformer.h.13.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.13.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.13.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.13.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.13.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.13.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -285,13 +251,11 @@ "transformer.h.13.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.13.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.13.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.13.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -301,25 +265,21 @@ "transformer.h.14.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.14.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.14.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.14.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.14.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.14.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -333,13 +293,11 @@ "transformer.h.14.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.14.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.14.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.14.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -349,25 +307,21 @@ "transformer.h.15.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.15.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.15.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.15.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.15.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.15.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -381,13 +335,11 @@ "transformer.h.15.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.15.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.15.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.15.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -397,25 +349,21 @@ "transformer.h.16.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.16.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.16.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.16.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.16.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.16.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -429,13 +377,11 @@ "transformer.h.16.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.16.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.16.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.16.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -445,25 +391,21 @@ "transformer.h.17.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.17.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.17.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.17.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.17.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.17.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -477,13 +419,11 @@ "transformer.h.17.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.17.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.17.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.17.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -493,25 +433,21 @@ "transformer.h.18.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.18.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.18.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.18.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.18.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.18.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -525,13 +461,11 @@ "transformer.h.18.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.18.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.18.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.18.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -541,25 +475,21 @@ "transformer.h.19.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.19.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.19.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.19.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.19.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.19.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -573,13 +503,11 @@ "transformer.h.19.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.19.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.19.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.19.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.2.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -589,25 +517,21 @@ "transformer.h.2.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.2.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.2.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.2.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.2.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.2.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -621,13 +545,11 @@ "transformer.h.2.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.2.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.2.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.2.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.20.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -637,25 +559,21 @@ "transformer.h.20.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.20.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.20.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.20.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.20.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.20.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -669,13 +587,11 @@ "transformer.h.20.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.20.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.20.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.20.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -685,25 +601,21 @@ "transformer.h.21.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.21.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.21.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.21.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.21.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.21.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -717,13 +629,11 @@ "transformer.h.21.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.21.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.21.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.21.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -733,25 +643,21 @@ "transformer.h.22.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.22.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.22.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.22.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.22.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.22.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -765,13 +671,11 @@ "transformer.h.22.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.22.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.22.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.22.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -781,25 +685,21 @@ "transformer.h.23.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.23.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.23.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.23.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.23.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.23.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -813,13 +713,11 @@ "transformer.h.23.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.23.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.23.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.23.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -829,25 +727,21 @@ "transformer.h.24.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.24.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.24.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.24.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.24.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.24.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -861,13 +755,11 @@ "transformer.h.24.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.24.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.24.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.24.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.c_attn.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.c_attn.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.c_proj.bias": "pytorch_model-00002-of-00004.bin", @@ -877,25 +769,21 @@ "transformer.h.25.attn.lora_A_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_A_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_A_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.25.attn.lora_A_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_A_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_A_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_A_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_A_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_A_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.25.attn.lora_A_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_attn.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_attn.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_attn.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_attn.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_attn.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.25.attn.lora_B_c_attn.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_proj.0.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_proj.1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_proj.2.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_proj.3.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.attn.lora_B_c_proj.4.weight": "pytorch_model-00002-of-00004.bin", - "transformer.h.25.attn.lora_B_c_proj.5.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.ln_1.bias": "pytorch_model-00002-of-00004.bin", "transformer.h.25.ln_1.weight": "pytorch_model-00002-of-00004.bin", "transformer.h.25.ln_2.bias": "pytorch_model-00002-of-00004.bin", @@ -909,13 +797,11 @@ "transformer.h.25.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.25.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.25.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.25.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.25.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.25.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.25.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.25.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.25.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.25.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -925,25 +811,21 @@ "transformer.h.26.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.26.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.26.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.26.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.26.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.26.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -957,13 +839,11 @@ "transformer.h.26.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.26.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.26.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.26.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -973,25 +853,21 @@ "transformer.h.27.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.27.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.27.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.27.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.27.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.27.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1005,13 +881,11 @@ "transformer.h.27.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.27.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.27.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.27.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1021,25 +895,21 @@ "transformer.h.28.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.28.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.28.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.28.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.28.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.28.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1053,13 +923,11 @@ "transformer.h.28.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.28.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.28.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.28.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1069,25 +937,21 @@ "transformer.h.29.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.29.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.29.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.29.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.29.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.29.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1101,13 +965,11 @@ "transformer.h.29.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.29.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.29.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.29.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.3.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -1117,25 +979,21 @@ "transformer.h.3.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.3.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.3.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.3.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.3.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.3.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -1149,13 +1007,11 @@ "transformer.h.3.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.3.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.3.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.3.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.30.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1165,25 +1021,21 @@ "transformer.h.30.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.30.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.30.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.30.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.30.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.30.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1197,13 +1049,11 @@ "transformer.h.30.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.30.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.30.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.30.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1213,25 +1063,21 @@ "transformer.h.31.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.31.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.31.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.31.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.31.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.31.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1245,13 +1091,11 @@ "transformer.h.31.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.31.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.31.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.31.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1261,25 +1105,21 @@ "transformer.h.32.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.32.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.32.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.32.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.32.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.32.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1293,13 +1133,11 @@ "transformer.h.32.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.32.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.32.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.32.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1309,25 +1147,21 @@ "transformer.h.33.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.33.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.33.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.33.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.33.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.33.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1341,13 +1175,11 @@ "transformer.h.33.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.33.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.33.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.33.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1357,25 +1189,21 @@ "transformer.h.34.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.34.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.34.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.34.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.34.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.34.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1389,13 +1217,11 @@ "transformer.h.34.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.34.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.34.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.34.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1405,25 +1231,21 @@ "transformer.h.35.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.35.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.35.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.35.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.35.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.35.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1437,13 +1259,11 @@ "transformer.h.35.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.35.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.35.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.35.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1453,25 +1273,21 @@ "transformer.h.36.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.36.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.36.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.36.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.36.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.36.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1485,13 +1301,11 @@ "transformer.h.36.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.36.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.36.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.36.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1501,25 +1315,21 @@ "transformer.h.37.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.37.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.37.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.37.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.37.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.37.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1533,13 +1343,11 @@ "transformer.h.37.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.37.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.37.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.37.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.c_attn.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.c_attn.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.c_proj.bias": "pytorch_model-00003-of-00004.bin", @@ -1549,25 +1357,21 @@ "transformer.h.38.attn.lora_A_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_A_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_A_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.38.attn.lora_A_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_A_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_A_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_A_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_A_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_A_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.38.attn.lora_A_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_attn.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_attn.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_attn.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_attn.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_attn.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.38.attn.lora_B_c_attn.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_proj.0.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_proj.1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_proj.2.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_proj.3.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.attn.lora_B_c_proj.4.weight": "pytorch_model-00003-of-00004.bin", - "transformer.h.38.attn.lora_B_c_proj.5.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.ln_1.bias": "pytorch_model-00003-of-00004.bin", "transformer.h.38.ln_1.weight": "pytorch_model-00003-of-00004.bin", "transformer.h.38.ln_2.bias": "pytorch_model-00003-of-00004.bin", @@ -1581,13 +1385,11 @@ "transformer.h.38.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.38.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.38.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00004-of-00004.bin", - "transformer.h.38.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.38.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.38.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.38.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.38.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.38.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00004-of-00004.bin", - "transformer.h.38.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.c_attn.bias": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.c_attn.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.c_proj.bias": "pytorch_model-00004-of-00004.bin", @@ -1597,25 +1399,21 @@ "transformer.h.39.attn.lora_A_c_attn.2.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_A_c_attn.3.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_A_c_attn.4.weight": "pytorch_model-00004-of-00004.bin", - "transformer.h.39.attn.lora_A_c_attn.5.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_A_c_proj.0.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_A_c_proj.1.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_A_c_proj.2.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_A_c_proj.3.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_A_c_proj.4.weight": "pytorch_model-00004-of-00004.bin", - "transformer.h.39.attn.lora_A_c_proj.5.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_attn.0.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_attn.1.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_attn.2.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_attn.3.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_attn.4.weight": "pytorch_model-00004-of-00004.bin", - "transformer.h.39.attn.lora_B_c_attn.5.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_proj.0.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_proj.1.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_proj.2.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_proj.3.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.attn.lora_B_c_proj.4.weight": "pytorch_model-00004-of-00004.bin", - "transformer.h.39.attn.lora_B_c_proj.5.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.ln_1.bias": "pytorch_model-00004-of-00004.bin", "transformer.h.39.ln_1.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.ln_2.bias": "pytorch_model-00004-of-00004.bin", @@ -1629,13 +1427,11 @@ "transformer.h.39.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00004-of-00004.bin", - "transformer.h.39.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.39.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00004-of-00004.bin", - "transformer.h.39.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00004-of-00004.bin", "transformer.h.4.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -1645,25 +1441,21 @@ "transformer.h.4.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.4.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.4.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.4.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.4.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.4.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -1677,13 +1469,11 @@ "transformer.h.4.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.4.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.4.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.4.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -1693,25 +1483,21 @@ "transformer.h.5.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.5.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.5.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.5.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.5.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.5.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -1725,13 +1511,11 @@ "transformer.h.5.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.5.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.5.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.5.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -1741,25 +1525,21 @@ "transformer.h.6.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.6.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.6.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.6.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.6.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.6.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -1773,13 +1553,11 @@ "transformer.h.6.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.6.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.6.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.6.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -1789,25 +1567,21 @@ "transformer.h.7.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.7.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.7.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.7.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.7.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.7.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -1821,13 +1595,11 @@ "transformer.h.7.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.7.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.7.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.7.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -1837,25 +1609,21 @@ "transformer.h.8.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.8.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.8.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.8.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.8.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.8.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -1869,13 +1637,11 @@ "transformer.h.8.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.8.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.8.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.8.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.c_attn.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.c_attn.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.c_proj.bias": "pytorch_model-00001-of-00004.bin", @@ -1885,25 +1651,21 @@ "transformer.h.9.attn.lora_A_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_A_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_A_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.9.attn.lora_A_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_A_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_A_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_A_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_A_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_A_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.9.attn.lora_A_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_attn.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_attn.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_attn.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_attn.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_attn.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.9.attn.lora_B_c_attn.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_proj.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_proj.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_proj.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_proj.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.attn.lora_B_c_proj.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.9.attn.lora_B_c_proj.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.ln_1.bias": "pytorch_model-00001-of-00004.bin", "transformer.h.9.ln_1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.ln_2.bias": "pytorch_model-00001-of-00004.bin", @@ -1917,13 +1679,11 @@ "transformer.h.9.mlp.lora_A_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.mlp.lora_A_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.mlp.lora_A_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.9.mlp.lora_A_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.mlp.lora_B_c_proj_MLP.0.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.mlp.lora_B_c_proj_MLP.1.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.mlp.lora_B_c_proj_MLP.2.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.mlp.lora_B_c_proj_MLP.3.weight": "pytorch_model-00001-of-00004.bin", "transformer.h.9.mlp.lora_B_c_proj_MLP.4.weight": "pytorch_model-00001-of-00004.bin", - "transformer.h.9.mlp.lora_B_c_proj_MLP.5.weight": "pytorch_model-00001-of-00004.bin", "transformer.ln_f.bias": "pytorch_model-00004-of-00004.bin", "transformer.ln_f.weight": "pytorch_model-00004-of-00004.bin", "transformer.moe_gate.weight": "pytorch_model-00001-of-00004.bin",