Upload model

Browse files

Files changed (9) hide show

config.json +46 -0
configuration_reward_model.py +13 -0
modeling_rewards.py +85 -0
pytorch_model-00001-of-00005.bin +3 -0
pytorch_model-00002-of-00005.bin +3 -0
pytorch_model-00003-of-00005.bin +3 -0
pytorch_model-00004-of-00005.bin +3 -0
pytorch_model-00005-of-00005.bin +3 -0
pytorch_model.bin.index.json +688 -0

config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "_name_or_path": "/mnt/nvme/home/zanussbaum/.cache/huggingface/hub/models--LawInformedAI--reward_model_epoch_0/snapshots/2f4cc4d6ce97d6cfdb43d225e47f1736a72e5152/",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "RewardModel"
+  ],
+  "attn_pdrop": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration_reward_model.RewardConfig",
+    "AutoModel": "modeling_rewards.RewardModel"
+  },
+  "base_model": "LawInformedAI/gptj-finetuned_court_opinions",
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 50256,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "n_embd": 4096,
+  "n_head": 16,
+  "n_inner": null,
+  "n_layer": 28,
+  "n_positions": 2048,
+  "pad_id": null,
+  "resid_pdrop": 0.0,
+  "rotary": true,
+  "rotary_dim": 64,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50,
+      "temperature": 1.0
+    }
+  },
+  "tie_word_embeddings": false,
+  "tokenizer_class": "GPT2Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.27.4",
+  "use_cache": false,
+  "vocab_size": 50400
+}

configuration_reward_model.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from transformers import PretrainedConfig
+from typing import List
+class RewardConfig(PretrainedConfig):
+    def __init__(
+        self,
+        base_model="EleutherAI/gpt-j-6b",
+        **kwargs,
+    ):
+        self.base_model = base_model
+        super().__init__(**kwargs)

modeling_rewards.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from transformers import PreTrainedModel
+from transformers.modeling_outputs import ModelOutput
+from torch import nn
+import torch
+from dataclasses import dataclass
+from typing import Optional
+from configuration_reward_model import RewardConfig
+from transformers import AutoModelForCausalLM
+# adapted from https://github.com/Dahoas/reward-modeling
+@dataclass
+class RewardOutputs(ModelOutput):
+    loss: Optional[torch.FloatTensor] = None
+    rewards: torch.FloatTensor = None
+class RewardModel(PreTrainedModel):
+    config_class = RewardConfig
+    def __init__(self, config, **kwargs):
+        super().__init__(config, **kwargs)
+        base_model = AutoModelForCausalLM.from_pretrained(config.base_model)
+        self.config = config
+        self.neox = "neox" in self.config.model_type
+        # gpt-neo models have hidden_size instead of n_embd
+        self.config.n_embd = self.config.hidden_size if hasattr(self.config, "hidden_size") else self.config.n_embd
+        self.transformer = base_model.transformer
+        dtype = self.config.torch_dtype if hasattr(self.config, "torch_dtype") is not None else torch.float32
+        dtype = torch.float16 if dtype == "float16" else torch.float32
+        self.v_head = nn.Linear(self.config.n_embd, 1, bias=False, dtype=torch.float16)
+        self.PAD_ID = config.pad_id
+        self.base_model = base_model
+    def gradient_checkpointing_enable(self):
+        self.base_model.gradient_checkpointing_enable()
+    def forward(
+        self,
+        chosen_input_ids=None,
+        rejected_input_ids=None,
+        past_key_values=None,
+        chosen_attention_mask=None,
+        rejected_attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+    ):
+        # concat chosen + rejected where first half is chosen and second half is rejected
+        input_ids = torch.cat([chosen_input_ids, rejected_input_ids], dim=0)
+        attention_mask = torch.cat([chosen_attention_mask, rejected_attention_mask], dim=0)
+        transformer_outputs = self.transformer(
+            input_ids,
+            past_key_values=past_key_values,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+        )
+        hidden_states = transformer_outputs[0]
+        rewards = self.v_head(hidden_states).squeeze(-1)
+        bs = input_ids.shape[0] // 2
+        # argmax returns the first index of the maximum value !
+        # so we find the first pad/eos token at each row
+        ends = torch.argmax((input_ids == self.PAD_ID).type(torch.float32), dim=1).view(-1, 1)
+        rewards = torch.gather(rewards, 1, ends)
+        chosen_rewards = rewards[:bs]
+        rejected_rewards = rewards[bs:]
+        loss = -torch.log(torch.sigmoid(chosen_rewards - rejected_rewards)).mean()
+        return RewardOutputs(
+            loss=loss,
+            rewards=rewards,
+        )

pytorch_model-00001-of-00005.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b90613038aadda3868b9f493e9ee7d9b5233f3e7c8a472f86e3c90c54439d748
+size 10004232434

pytorch_model-00002-of-00005.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad15a1cd00577020be147bb2beef54e548e1f43d5d57852432f49ab15c7c3c15
+size 9983934481

pytorch_model-00003-of-00005.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed63fa541d14fa3ff6607c9d08a7a673a62233bb78c37a81ac171c27f598b634
+size 10004291041

pytorch_model-00004-of-00005.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ae2941bc8a00c6cfbb53a0234ec0865170cb71371d887445e3f10349ec19739
+size 9983871187

pytorch_model-00005-of-00005.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ae132de317ea0db5aefbe3f2c974946e84da48a4545b1f39ce4b67f240b5ba6
+size 7839899571

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,688 @@

+{
+  "metadata": {
+    "total_size": 47610475616.0
+  },
+  "weight_map": {
+    "base_model.lm_head.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.lm_head.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.0.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.0.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.1.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.10.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.10.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.11.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.12.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.13.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.14.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.15.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.16.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.17.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.18.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.19.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.19.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.19.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.19.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.19.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.19.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.19.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.19.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.19.mlp.fc_in.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.19.mlp.fc_in.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.19.mlp.fc_out.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.19.mlp.fc_out.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.2.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.2.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.20.attn.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.attn.k_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.attn.masked_bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.attn.out_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.attn.q_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.attn.v_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.ln_1.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.ln_1.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.mlp.fc_in.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.mlp.fc_in.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.mlp.fc_out.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.20.mlp.fc_out.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.attn.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.attn.k_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.attn.masked_bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.attn.out_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.attn.q_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.attn.v_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.ln_1.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.ln_1.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.mlp.fc_in.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.mlp.fc_in.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.mlp.fc_out.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.21.mlp.fc_out.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.attn.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.attn.k_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.attn.masked_bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.attn.out_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.attn.q_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.attn.v_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.ln_1.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.ln_1.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.mlp.fc_in.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.mlp.fc_in.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.mlp.fc_out.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.22.mlp.fc_out.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.attn.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.attn.k_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.attn.masked_bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.attn.out_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.attn.q_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.attn.v_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.ln_1.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.ln_1.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.mlp.fc_in.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.mlp.fc_in.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.mlp.fc_out.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.23.mlp.fc_out.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.attn.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.attn.k_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.attn.masked_bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.attn.out_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.attn.q_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.attn.v_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.ln_1.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.ln_1.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.mlp.fc_in.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.mlp.fc_in.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.mlp.fc_out.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.24.mlp.fc_out.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.attn.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.attn.k_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.attn.masked_bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.attn.out_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.attn.q_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.attn.v_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.ln_1.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.ln_1.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.mlp.fc_in.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.mlp.fc_in.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.mlp.fc_out.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.25.mlp.fc_out.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.attn.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.attn.k_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.attn.masked_bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.attn.out_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.attn.q_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.attn.v_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.ln_1.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.ln_1.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.mlp.fc_in.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.mlp.fc_in.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.mlp.fc_out.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.26.mlp.fc_out.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.attn.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.attn.k_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.attn.masked_bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.attn.out_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.attn.q_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.attn.v_proj.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.ln_1.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.ln_1.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.mlp.fc_in.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.mlp.fc_in.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.mlp.fc_out.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.27.mlp.fc_out.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.h.3.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.3.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.4.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.5.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.6.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.7.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.7.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.7.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.7.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.7.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.7.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.7.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.7.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "base_model.transformer.h.7.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.7.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.7.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.7.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.8.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.attn.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.attn.k_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.attn.masked_bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.attn.out_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.attn.q_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.attn.v_proj.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.ln_1.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.ln_1.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.mlp.fc_in.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.mlp.fc_in.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.mlp.fc_out.bias": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.h.9.mlp.fc_out.weight": "pytorch_model-00004-of-00005.bin",
+    "base_model.transformer.ln_f.bias": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.ln_f.weight": "pytorch_model-00005-of-00005.bin",
+    "base_model.transformer.wte.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.0.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.0.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.1.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.10.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.11.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.11.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.11.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.11.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.11.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.11.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.11.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.11.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.11.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.11.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.11.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.11.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.12.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.13.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.14.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.15.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.16.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.17.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.18.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.19.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.2.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.2.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.20.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.20.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.21.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.mlp.fc_out.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.22.mlp.fc_out.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.attn.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.attn.k_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.attn.masked_bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.attn.out_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.attn.q_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.attn.v_proj.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.ln_1.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.ln_1.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.mlp.fc_in.bias": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.mlp.fc_in.weight": "pytorch_model-00002-of-00005.bin",
+    "transformer.h.23.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.23.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.24.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.25.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.26.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.attn.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.attn.k_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.attn.masked_bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.attn.out_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.attn.q_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.attn.v_proj.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.ln_1.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.ln_1.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.mlp.fc_in.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.mlp.fc_in.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.mlp.fc_out.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.27.mlp.fc_out.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.h.3.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.3.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.4.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.5.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.6.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.7.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.8.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.attn.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.attn.k_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.attn.masked_bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.attn.out_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.attn.q_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.attn.v_proj.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.ln_1.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.ln_1.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.mlp.fc_in.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.mlp.fc_in.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.mlp.fc_out.bias": "pytorch_model-00001-of-00005.bin",
+    "transformer.h.9.mlp.fc_out.weight": "pytorch_model-00001-of-00005.bin",
+    "transformer.ln_f.bias": "pytorch_model-00003-of-00005.bin",
+    "transformer.ln_f.weight": "pytorch_model-00003-of-00005.bin",
+    "transformer.wte.weight": "pytorch_model-00001-of-00005.bin",
+    "v_head.weight": "pytorch_model-00003-of-00005.bin"
+  }
+}