Florian valade commited on
Commit
794f115
·
1 Parent(s): 24bcb4b

fix weird git behavior

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
BranchyModelConfig.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+ from transformers import PretrainedConfig
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class BranchyModelConfig(PretrainedConfig):
8
+ """
9
+ Configuration class for BranchyModel. This class extends the PretrainedConfig class from the Transformers
10
+ library, providing configuration specific to models with branch functionality.
11
+
12
+ Attributes:
13
+ branch_locations (List[int]): Specifies the indices of layers after which branches are added. These indices
14
+ start from 0, and each index represents a layer in the underlying transformer model.
15
+ penalty_weight (Optional[float]): The weight of the penalty term used in the "penalized_cross_entropy" loss.
16
+ This parameter is required and must be greater than 0
17
+ window_size (int): Determines the number of tokens each branch considers from the input sequence. This allows
18
+ for reducing the computational load by limiting the context size each branch processes.
19
+
20
+ Example:
21
+ config = BranchyModelConfig(
22
+ branch_locations=[2, 4, 6],
23
+ window_size=512
24
+ )
25
+
26
+ Note:
27
+ This configuration class is specifically designed for use with the BranchyModel class, enabling flexible
28
+ and customizable branching within transformer models.
29
+ """
30
+ model_type = "branchy" # Optional, but useful for identifying the model type in the Transformers library
31
+
32
+ def __init__(
33
+ self,
34
+ model_str: str = None,
35
+ head_thresholds: Optional[List[float]] = None,
36
+ confidence_metric: Optional[str] = "breaking_ties",
37
+ branch_locations: Optional[List[int]] = None,
38
+ branch_number: Optional[int] = 3,
39
+ penalty_weight: Optional[float] = 0,
40
+ head_window_size: int = 512,
41
+ copy_lm_head: Optional[bool] = False,
42
+ **kwargs
43
+ ):
44
+ """
45
+ Initializes the BranchyModelConfig.
46
+
47
+ Args:
48
+ model_str (str): The model string to be used for the model. From Huggingface's model hub.
49
+ branch_locations (List[int], optional): Locations of the branches. Defaults to None, indicating no branches.
50
+ branch_number (Optional[int], optional): Number of branches if branch_locations is not provided. Defaults to 3.
51
+ penalty_weight (Optional[float], optional): Weight for the penalty in loss calculation.
52
+ . Defaults to None.
53
+ head_window_size (int, optional): Number of tokens each branch can see. Defaults to 512.
54
+ """
55
+ self.model_str = model_str
56
+ self.head_thresholds = head_thresholds
57
+ self.confidence_metric = confidence_metric
58
+ assert self.confidence_metric in ["breaking_ties", "max"], "confidence_metric must be 'breaking_ties' or 'max'. It should depend on how you found the thresholds."
59
+ self.branch_locations = branch_locations
60
+ self.penalty_weight = penalty_weight
61
+ self.head_window_size = head_window_size
62
+ if branch_locations is not None and branch_number is not None:
63
+ logger.warning("Both branch_locations and branch_number are provided. Using branch_locations.")
64
+ self.branch_number = branch_number if branch_locations is None else len(branch_locations)
65
+ self.copy_lm_head = copy_lm_head
66
+ #assert self.model_str is not None, "model_str must be provided."
67
+ assert self.branch_number > 0, "branch_number must be a positive integer."
68
+ assert isinstance(self.penalty_weight, float) or isinstance(self.penalty_weight, int), "penalty_weight must be a float or an integer."
69
+ assert self.penalty_weight >= 0 and self.penalty_weight <= 1, "penalty_weight must be in the range [0, 1]."
70
+ if branch_locations is not None:
71
+ assert all([isinstance(loc, int) for loc in self.branch_locations]), "Branch locations must be integers."
72
+ assert all([loc >= 0 for loc in self.branch_locations]), "Branch locations must be non-negative."
73
+ if self.head_window_size is not None:
74
+ assert self.head_window_size > 0 , "head_window_size must be a positive integer or None."
75
+ if type(self.head_thresholds) == list:
76
+ assert len(self.head_thresholds) == self.branch_number, "Number of thresholds must match number of branches."
77
+ assert all([isinstance(threshold, float) for threshold in self.head_thresholds]), "Thresholds must be floats."
78
+ super().__init__(**kwargs) # Initialize with base class parameters
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: mit
5
+ library_name: transformers
6
+ pipeline_tag: text-generation
7
+ ---
8
+
9
+ # Model Card for Model ID
10
+
11
+ Phi-2 is a Transformer with **2.7 billion** parameters. It was trained using the same data sources as [Phi-1.5](https://huggingface.co/microsoft/phi-1.5), augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value). When assessed against benchmarks testing common sense, language understanding, and logical reasoning, Phi-2 showcased a nearly state-of-the-art performance among models with less than 13 billion parameters.
12
+
13
+ This version of Phi-2 is one with added Early Exit in order to accelerate inference. Each Early Exit was trained using self-supervised technique from model outputs.
14
+
15
+ ### Model Description
16
+
17
+ This model provides trained head to make Phi-2 a Early exit model.
18
+
19
+ - **Developed by:** Florian Valade
20
+ - **Shared by:** Florian Valade
21
+ - **Model type:** Text generation
22
+ - **License:** MIT
23
+ - **Finetuned from model :** https://huggingface.co/microsoft/phi-2
24
+
25
+ ### Model Sources
26
+
27
+ - **Repository:** [TBD]
28
+ - **Paper:** [TBD]
29
+ - **Demo:** [TBD]
30
+
31
+ ## Uses
32
+
33
+ When used as provided, the model does not use Early Exits. One needs to set head_thresholds in the configuration in order to use inference acceleration.
34
+
35
+ different head_thresholds for different ε :
36
+
37
+ | ε | head_thresholds |
38
+ | ------------ | ----------------------------------------------------------------------------------- |
39
+ | 0.4 | [1.0307843685150146, 0.8693032264709473, 0.6637287139892578, 0.3111608028411865] |
40
+ | 0.5 | [1.505380630493164, 1.5712471008300781, 1.1971790790557861, 0.6908178329467773] |
41
+ | 0.6 | [2.0270779132843018, 1.8969502449035645, 1.4789371490478516, 0.9875392913818359] |
42
+ | 0.7 | [2.506962537765503, 2.656052589416504, 1.924393653869629, 1.4434680938720703] |
43
+ | 0.8 | [3.3786778450012207, 2.568857192993164, 2.5665550231933594, 2.006620407104492] |
44
+ | 0.9 | [3.187114715576172, 3.442272663116455, 2.636230945587158, 2.460529088973999] |
45
+
46
+ When you have selected the thresholds you can use :
47
+
48
+ ```python
49
+ import torch
50
+ from transformers import AutoModelForCausalLM, AutoTokenizer
51
+
52
+ model = AutoModelForCausalLM.from_pretrained("valcore/branchy_phi-2_base", trust_remote_code=True, device_map="cpu")
53
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-2")
54
+
55
+ model.eval()
56
+
57
+ inputs = tokenizer('''def print_prime(n):
58
+ """
59
+ Print all primes between 1 and n
60
+ """''', return_tensors="pt", return_attention_mask=False)
61
+ # Put here the selected thresholds :
62
+ model.head_thresholds = torch.tensor([3.187114715576172, 3.442272663116455, 2.636230945587158, 2.460529088973999])
63
+
64
+ outputs = model.generate(**inputs, max_length=200)
65
+ text = tokenizer.batch_decode(outputs)[0]
66
+ print(text)
67
+
68
+
69
+ ```
70
+
71
+ ## Citation [optional]
72
+
73
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
74
+
75
+ **BibTeX:**
76
+
77
+ TBD
78
+
79
+
80
+ ## Model Card Contact
81
+
82
+ Florian Valade
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BranchyCausalModel"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "BranchyModelConfig.BranchyModelConfig",
7
+ "AutoModelForCausalLM": "BranchyModel.BranchyCausalModel"
8
+ },
9
+ "branch_locations": [
10
+ 6,
11
+ 12,
12
+ 18,
13
+ 24
14
+ ],
15
+ "branch_number": 4,
16
+ "confidence_metric": "breaking_ties",
17
+ "copy_lm_head": false,
18
+ "head_thresholds": [
19
+ 10.0,
20
+ 10.0,
21
+ 10.0,
22
+ 10.0
23
+ ],
24
+ "head_window_size": 512,
25
+ "model_str": "microsoft/phi-2",
26
+ "model_type": "branchy",
27
+ "penalty_weight": 0.9,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.40.2"
30
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.40.2"
4
+ }
model.safetensors.index.json ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13216788480
4
+ },
5
+ "weight_map": {
6
+ "branches.0.layernorm.bias": "model-00003-of-00003.safetensors",
7
+ "branches.0.layernorm.weight": "model-00003-of-00003.safetensors",
8
+ "branches.0.lm_head.bias": "model-00003-of-00003.safetensors",
9
+ "branches.0.lm_head.weight": "model-00003-of-00003.safetensors",
10
+ "branches.1.layernorm.bias": "model-00003-of-00003.safetensors",
11
+ "branches.1.layernorm.weight": "model-00003-of-00003.safetensors",
12
+ "branches.1.lm_head.bias": "model-00003-of-00003.safetensors",
13
+ "branches.1.lm_head.weight": "model-00003-of-00003.safetensors",
14
+ "branches.2.layernorm.bias": "model-00003-of-00003.safetensors",
15
+ "branches.2.layernorm.weight": "model-00003-of-00003.safetensors",
16
+ "branches.2.lm_head.bias": "model-00003-of-00003.safetensors",
17
+ "branches.2.lm_head.weight": "model-00003-of-00003.safetensors",
18
+ "branches.3.layernorm.bias": "model-00003-of-00003.safetensors",
19
+ "branches.3.layernorm.weight": "model-00003-of-00003.safetensors",
20
+ "branches.3.lm_head.bias": "model-00003-of-00003.safetensors",
21
+ "branches.3.lm_head.weight": "model-00003-of-00003.safetensors",
22
+ "lm_head.bias": "model-00003-of-00003.safetensors",
23
+ "lm_head.weight": "model-00003-of-00003.safetensors",
24
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
25
+ "model.final_layernorm.bias": "model-00003-of-00003.safetensors",
26
+ "model.final_layernorm.weight": "model-00003-of-00003.safetensors",
27
+ "model.layers.0.input_layernorm.bias": "model-00001-of-00003.safetensors",
28
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.0.mlp.fc1.bias": "model-00001-of-00003.safetensors",
30
+ "model.layers.0.mlp.fc1.weight": "model-00001-of-00003.safetensors",
31
+ "model.layers.0.mlp.fc2.bias": "model-00001-of-00003.safetensors",
32
+ "model.layers.0.mlp.fc2.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.0.self_attn.dense.bias": "model-00001-of-00003.safetensors",
34
+ "model.layers.0.self_attn.dense.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
36
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
37
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
38
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
39
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
40
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
41
+ "model.layers.1.input_layernorm.bias": "model-00001-of-00003.safetensors",
42
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
43
+ "model.layers.1.mlp.fc1.bias": "model-00001-of-00003.safetensors",
44
+ "model.layers.1.mlp.fc1.weight": "model-00001-of-00003.safetensors",
45
+ "model.layers.1.mlp.fc2.bias": "model-00001-of-00003.safetensors",
46
+ "model.layers.1.mlp.fc2.weight": "model-00001-of-00003.safetensors",
47
+ "model.layers.1.self_attn.dense.bias": "model-00001-of-00003.safetensors",
48
+ "model.layers.1.self_attn.dense.weight": "model-00001-of-00003.safetensors",
49
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
50
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
51
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
52
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
53
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
54
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
55
+ "model.layers.10.input_layernorm.bias": "model-00001-of-00003.safetensors",
56
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors",
57
+ "model.layers.10.mlp.fc1.bias": "model-00001-of-00003.safetensors",
58
+ "model.layers.10.mlp.fc1.weight": "model-00001-of-00003.safetensors",
59
+ "model.layers.10.mlp.fc2.bias": "model-00001-of-00003.safetensors",
60
+ "model.layers.10.mlp.fc2.weight": "model-00001-of-00003.safetensors",
61
+ "model.layers.10.self_attn.dense.bias": "model-00001-of-00003.safetensors",
62
+ "model.layers.10.self_attn.dense.weight": "model-00001-of-00003.safetensors",
63
+ "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
64
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
65
+ "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
66
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
67
+ "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
68
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
69
+ "model.layers.11.input_layernorm.bias": "model-00001-of-00003.safetensors",
70
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors",
71
+ "model.layers.11.mlp.fc1.bias": "model-00001-of-00003.safetensors",
72
+ "model.layers.11.mlp.fc1.weight": "model-00001-of-00003.safetensors",
73
+ "model.layers.11.mlp.fc2.bias": "model-00001-of-00003.safetensors",
74
+ "model.layers.11.mlp.fc2.weight": "model-00001-of-00003.safetensors",
75
+ "model.layers.11.self_attn.dense.bias": "model-00001-of-00003.safetensors",
76
+ "model.layers.11.self_attn.dense.weight": "model-00001-of-00003.safetensors",
77
+ "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
78
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
79
+ "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
80
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
81
+ "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
82
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
83
+ "model.layers.12.input_layernorm.bias": "model-00001-of-00003.safetensors",
84
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors",
85
+ "model.layers.12.mlp.fc1.bias": "model-00001-of-00003.safetensors",
86
+ "model.layers.12.mlp.fc1.weight": "model-00001-of-00003.safetensors",
87
+ "model.layers.12.mlp.fc2.bias": "model-00001-of-00003.safetensors",
88
+ "model.layers.12.mlp.fc2.weight": "model-00001-of-00003.safetensors",
89
+ "model.layers.12.self_attn.dense.bias": "model-00001-of-00003.safetensors",
90
+ "model.layers.12.self_attn.dense.weight": "model-00001-of-00003.safetensors",
91
+ "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
92
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
93
+ "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
94
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
95
+ "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
96
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
97
+ "model.layers.13.input_layernorm.bias": "model-00001-of-00003.safetensors",
98
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors",
99
+ "model.layers.13.mlp.fc1.bias": "model-00001-of-00003.safetensors",
100
+ "model.layers.13.mlp.fc1.weight": "model-00001-of-00003.safetensors",
101
+ "model.layers.13.mlp.fc2.bias": "model-00001-of-00003.safetensors",
102
+ "model.layers.13.mlp.fc2.weight": "model-00001-of-00003.safetensors",
103
+ "model.layers.13.self_attn.dense.bias": "model-00001-of-00003.safetensors",
104
+ "model.layers.13.self_attn.dense.weight": "model-00001-of-00003.safetensors",
105
+ "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
106
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
107
+ "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
108
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
109
+ "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
110
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
111
+ "model.layers.14.input_layernorm.bias": "model-00002-of-00003.safetensors",
112
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.14.mlp.fc1.bias": "model-00002-of-00003.safetensors",
114
+ "model.layers.14.mlp.fc1.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.14.mlp.fc2.bias": "model-00002-of-00003.safetensors",
116
+ "model.layers.14.mlp.fc2.weight": "model-00002-of-00003.safetensors",
117
+ "model.layers.14.self_attn.dense.bias": "model-00002-of-00003.safetensors",
118
+ "model.layers.14.self_attn.dense.weight": "model-00002-of-00003.safetensors",
119
+ "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
120
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
122
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
124
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
125
+ "model.layers.15.input_layernorm.bias": "model-00002-of-00003.safetensors",
126
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.15.mlp.fc1.bias": "model-00002-of-00003.safetensors",
128
+ "model.layers.15.mlp.fc1.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.15.mlp.fc2.bias": "model-00002-of-00003.safetensors",
130
+ "model.layers.15.mlp.fc2.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.15.self_attn.dense.bias": "model-00002-of-00003.safetensors",
132
+ "model.layers.15.self_attn.dense.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
134
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
136
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
138
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.16.input_layernorm.bias": "model-00002-of-00003.safetensors",
140
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.16.mlp.fc1.bias": "model-00002-of-00003.safetensors",
142
+ "model.layers.16.mlp.fc1.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.16.mlp.fc2.bias": "model-00002-of-00003.safetensors",
144
+ "model.layers.16.mlp.fc2.weight": "model-00002-of-00003.safetensors",
145
+ "model.layers.16.self_attn.dense.bias": "model-00002-of-00003.safetensors",
146
+ "model.layers.16.self_attn.dense.weight": "model-00002-of-00003.safetensors",
147
+ "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
148
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
150
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
152
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
153
+ "model.layers.17.input_layernorm.bias": "model-00002-of-00003.safetensors",
154
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
155
+ "model.layers.17.mlp.fc1.bias": "model-00002-of-00003.safetensors",
156
+ "model.layers.17.mlp.fc1.weight": "model-00002-of-00003.safetensors",
157
+ "model.layers.17.mlp.fc2.bias": "model-00002-of-00003.safetensors",
158
+ "model.layers.17.mlp.fc2.weight": "model-00002-of-00003.safetensors",
159
+ "model.layers.17.self_attn.dense.bias": "model-00002-of-00003.safetensors",
160
+ "model.layers.17.self_attn.dense.weight": "model-00002-of-00003.safetensors",
161
+ "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
162
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
163
+ "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
164
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
165
+ "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
166
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
167
+ "model.layers.18.input_layernorm.bias": "model-00002-of-00003.safetensors",
168
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
169
+ "model.layers.18.mlp.fc1.bias": "model-00002-of-00003.safetensors",
170
+ "model.layers.18.mlp.fc1.weight": "model-00002-of-00003.safetensors",
171
+ "model.layers.18.mlp.fc2.bias": "model-00002-of-00003.safetensors",
172
+ "model.layers.18.mlp.fc2.weight": "model-00002-of-00003.safetensors",
173
+ "model.layers.18.self_attn.dense.bias": "model-00002-of-00003.safetensors",
174
+ "model.layers.18.self_attn.dense.weight": "model-00002-of-00003.safetensors",
175
+ "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
176
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
177
+ "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
178
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
179
+ "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
180
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
181
+ "model.layers.19.input_layernorm.bias": "model-00002-of-00003.safetensors",
182
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
183
+ "model.layers.19.mlp.fc1.bias": "model-00002-of-00003.safetensors",
184
+ "model.layers.19.mlp.fc1.weight": "model-00002-of-00003.safetensors",
185
+ "model.layers.19.mlp.fc2.bias": "model-00002-of-00003.safetensors",
186
+ "model.layers.19.mlp.fc2.weight": "model-00002-of-00003.safetensors",
187
+ "model.layers.19.self_attn.dense.bias": "model-00002-of-00003.safetensors",
188
+ "model.layers.19.self_attn.dense.weight": "model-00002-of-00003.safetensors",
189
+ "model.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
190
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
191
+ "model.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
192
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
193
+ "model.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
194
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
195
+ "model.layers.2.input_layernorm.bias": "model-00001-of-00003.safetensors",
196
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
197
+ "model.layers.2.mlp.fc1.bias": "model-00001-of-00003.safetensors",
198
+ "model.layers.2.mlp.fc1.weight": "model-00001-of-00003.safetensors",
199
+ "model.layers.2.mlp.fc2.bias": "model-00001-of-00003.safetensors",
200
+ "model.layers.2.mlp.fc2.weight": "model-00001-of-00003.safetensors",
201
+ "model.layers.2.self_attn.dense.bias": "model-00001-of-00003.safetensors",
202
+ "model.layers.2.self_attn.dense.weight": "model-00001-of-00003.safetensors",
203
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
204
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
205
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
206
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
207
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
208
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
209
+ "model.layers.20.input_layernorm.bias": "model-00002-of-00003.safetensors",
210
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
211
+ "model.layers.20.mlp.fc1.bias": "model-00002-of-00003.safetensors",
212
+ "model.layers.20.mlp.fc1.weight": "model-00002-of-00003.safetensors",
213
+ "model.layers.20.mlp.fc2.bias": "model-00002-of-00003.safetensors",
214
+ "model.layers.20.mlp.fc2.weight": "model-00002-of-00003.safetensors",
215
+ "model.layers.20.self_attn.dense.bias": "model-00002-of-00003.safetensors",
216
+ "model.layers.20.self_attn.dense.weight": "model-00002-of-00003.safetensors",
217
+ "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
218
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
219
+ "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
220
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
221
+ "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
222
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
223
+ "model.layers.21.input_layernorm.bias": "model-00002-of-00003.safetensors",
224
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
225
+ "model.layers.21.mlp.fc1.bias": "model-00002-of-00003.safetensors",
226
+ "model.layers.21.mlp.fc1.weight": "model-00002-of-00003.safetensors",
227
+ "model.layers.21.mlp.fc2.bias": "model-00002-of-00003.safetensors",
228
+ "model.layers.21.mlp.fc2.weight": "model-00002-of-00003.safetensors",
229
+ "model.layers.21.self_attn.dense.bias": "model-00002-of-00003.safetensors",
230
+ "model.layers.21.self_attn.dense.weight": "model-00002-of-00003.safetensors",
231
+ "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
232
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
233
+ "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
234
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
235
+ "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
236
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
237
+ "model.layers.22.input_layernorm.bias": "model-00002-of-00003.safetensors",
238
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors",
239
+ "model.layers.22.mlp.fc1.bias": "model-00002-of-00003.safetensors",
240
+ "model.layers.22.mlp.fc1.weight": "model-00002-of-00003.safetensors",
241
+ "model.layers.22.mlp.fc2.bias": "model-00002-of-00003.safetensors",
242
+ "model.layers.22.mlp.fc2.weight": "model-00002-of-00003.safetensors",
243
+ "model.layers.22.self_attn.dense.bias": "model-00002-of-00003.safetensors",
244
+ "model.layers.22.self_attn.dense.weight": "model-00002-of-00003.safetensors",
245
+ "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
246
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
247
+ "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
248
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
249
+ "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
250
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
251
+ "model.layers.23.input_layernorm.bias": "model-00002-of-00003.safetensors",
252
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
253
+ "model.layers.23.mlp.fc1.bias": "model-00002-of-00003.safetensors",
254
+ "model.layers.23.mlp.fc1.weight": "model-00002-of-00003.safetensors",
255
+ "model.layers.23.mlp.fc2.bias": "model-00002-of-00003.safetensors",
256
+ "model.layers.23.mlp.fc2.weight": "model-00002-of-00003.safetensors",
257
+ "model.layers.23.self_attn.dense.bias": "model-00002-of-00003.safetensors",
258
+ "model.layers.23.self_attn.dense.weight": "model-00002-of-00003.safetensors",
259
+ "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
260
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
261
+ "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
262
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
263
+ "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
264
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
265
+ "model.layers.24.input_layernorm.bias": "model-00002-of-00003.safetensors",
266
+ "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors",
267
+ "model.layers.24.mlp.fc1.bias": "model-00002-of-00003.safetensors",
268
+ "model.layers.24.mlp.fc1.weight": "model-00002-of-00003.safetensors",
269
+ "model.layers.24.mlp.fc2.bias": "model-00002-of-00003.safetensors",
270
+ "model.layers.24.mlp.fc2.weight": "model-00002-of-00003.safetensors",
271
+ "model.layers.24.self_attn.dense.bias": "model-00002-of-00003.safetensors",
272
+ "model.layers.24.self_attn.dense.weight": "model-00002-of-00003.safetensors",
273
+ "model.layers.24.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
274
+ "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
275
+ "model.layers.24.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
276
+ "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
277
+ "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
278
+ "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
279
+ "model.layers.25.input_layernorm.bias": "model-00002-of-00003.safetensors",
280
+ "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors",
281
+ "model.layers.25.mlp.fc1.bias": "model-00002-of-00003.safetensors",
282
+ "model.layers.25.mlp.fc1.weight": "model-00002-of-00003.safetensors",
283
+ "model.layers.25.mlp.fc2.bias": "model-00002-of-00003.safetensors",
284
+ "model.layers.25.mlp.fc2.weight": "model-00002-of-00003.safetensors",
285
+ "model.layers.25.self_attn.dense.bias": "model-00002-of-00003.safetensors",
286
+ "model.layers.25.self_attn.dense.weight": "model-00002-of-00003.safetensors",
287
+ "model.layers.25.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
288
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
289
+ "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
290
+ "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
291
+ "model.layers.25.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
292
+ "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
293
+ "model.layers.26.input_layernorm.bias": "model-00002-of-00003.safetensors",
294
+ "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors",
295
+ "model.layers.26.mlp.fc1.bias": "model-00002-of-00003.safetensors",
296
+ "model.layers.26.mlp.fc1.weight": "model-00002-of-00003.safetensors",
297
+ "model.layers.26.mlp.fc2.bias": "model-00002-of-00003.safetensors",
298
+ "model.layers.26.mlp.fc2.weight": "model-00002-of-00003.safetensors",
299
+ "model.layers.26.self_attn.dense.bias": "model-00002-of-00003.safetensors",
300
+ "model.layers.26.self_attn.dense.weight": "model-00002-of-00003.safetensors",
301
+ "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
302
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
303
+ "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
304
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
305
+ "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
306
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
307
+ "model.layers.27.input_layernorm.bias": "model-00002-of-00003.safetensors",
308
+ "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors",
309
+ "model.layers.27.mlp.fc1.bias": "model-00002-of-00003.safetensors",
310
+ "model.layers.27.mlp.fc1.weight": "model-00002-of-00003.safetensors",
311
+ "model.layers.27.mlp.fc2.bias": "model-00002-of-00003.safetensors",
312
+ "model.layers.27.mlp.fc2.weight": "model-00002-of-00003.safetensors",
313
+ "model.layers.27.self_attn.dense.bias": "model-00002-of-00003.safetensors",
314
+ "model.layers.27.self_attn.dense.weight": "model-00002-of-00003.safetensors",
315
+ "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
316
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
317
+ "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
318
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
319
+ "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
320
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
321
+ "model.layers.28.input_layernorm.bias": "model-00002-of-00003.safetensors",
322
+ "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors",
323
+ "model.layers.28.mlp.fc1.bias": "model-00002-of-00003.safetensors",
324
+ "model.layers.28.mlp.fc1.weight": "model-00002-of-00003.safetensors",
325
+ "model.layers.28.mlp.fc2.bias": "model-00002-of-00003.safetensors",
326
+ "model.layers.28.mlp.fc2.weight": "model-00002-of-00003.safetensors",
327
+ "model.layers.28.self_attn.dense.bias": "model-00002-of-00003.safetensors",
328
+ "model.layers.28.self_attn.dense.weight": "model-00002-of-00003.safetensors",
329
+ "model.layers.28.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
330
+ "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
331
+ "model.layers.28.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
332
+ "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
333
+ "model.layers.28.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
334
+ "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
335
+ "model.layers.29.input_layernorm.bias": "model-00002-of-00003.safetensors",
336
+ "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors",
337
+ "model.layers.29.mlp.fc1.bias": "model-00002-of-00003.safetensors",
338
+ "model.layers.29.mlp.fc1.weight": "model-00002-of-00003.safetensors",
339
+ "model.layers.29.mlp.fc2.bias": "model-00002-of-00003.safetensors",
340
+ "model.layers.29.mlp.fc2.weight": "model-00002-of-00003.safetensors",
341
+ "model.layers.29.self_attn.dense.bias": "model-00002-of-00003.safetensors",
342
+ "model.layers.29.self_attn.dense.weight": "model-00002-of-00003.safetensors",
343
+ "model.layers.29.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
344
+ "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
345
+ "model.layers.29.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
346
+ "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
347
+ "model.layers.29.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
348
+ "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
349
+ "model.layers.3.input_layernorm.bias": "model-00001-of-00003.safetensors",
350
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
351
+ "model.layers.3.mlp.fc1.bias": "model-00001-of-00003.safetensors",
352
+ "model.layers.3.mlp.fc1.weight": "model-00001-of-00003.safetensors",
353
+ "model.layers.3.mlp.fc2.bias": "model-00001-of-00003.safetensors",
354
+ "model.layers.3.mlp.fc2.weight": "model-00001-of-00003.safetensors",
355
+ "model.layers.3.self_attn.dense.bias": "model-00001-of-00003.safetensors",
356
+ "model.layers.3.self_attn.dense.weight": "model-00001-of-00003.safetensors",
357
+ "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
358
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
359
+ "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
360
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
361
+ "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
362
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
363
+ "model.layers.30.input_layernorm.bias": "model-00003-of-00003.safetensors",
364
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
365
+ "model.layers.30.mlp.fc1.bias": "model-00003-of-00003.safetensors",
366
+ "model.layers.30.mlp.fc1.weight": "model-00003-of-00003.safetensors",
367
+ "model.layers.30.mlp.fc2.bias": "model-00003-of-00003.safetensors",
368
+ "model.layers.30.mlp.fc2.weight": "model-00003-of-00003.safetensors",
369
+ "model.layers.30.self_attn.dense.bias": "model-00003-of-00003.safetensors",
370
+ "model.layers.30.self_attn.dense.weight": "model-00003-of-00003.safetensors",
371
+ "model.layers.30.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
372
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
373
+ "model.layers.30.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
374
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
375
+ "model.layers.30.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
376
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
377
+ "model.layers.31.input_layernorm.bias": "model-00003-of-00003.safetensors",
378
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
379
+ "model.layers.31.mlp.fc1.bias": "model-00003-of-00003.safetensors",
380
+ "model.layers.31.mlp.fc1.weight": "model-00003-of-00003.safetensors",
381
+ "model.layers.31.mlp.fc2.bias": "model-00003-of-00003.safetensors",
382
+ "model.layers.31.mlp.fc2.weight": "model-00003-of-00003.safetensors",
383
+ "model.layers.31.self_attn.dense.bias": "model-00003-of-00003.safetensors",
384
+ "model.layers.31.self_attn.dense.weight": "model-00003-of-00003.safetensors",
385
+ "model.layers.31.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
386
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
387
+ "model.layers.31.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
388
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
389
+ "model.layers.31.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
390
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
391
+ "model.layers.4.input_layernorm.bias": "model-00001-of-00003.safetensors",
392
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
393
+ "model.layers.4.mlp.fc1.bias": "model-00001-of-00003.safetensors",
394
+ "model.layers.4.mlp.fc1.weight": "model-00001-of-00003.safetensors",
395
+ "model.layers.4.mlp.fc2.bias": "model-00001-of-00003.safetensors",
396
+ "model.layers.4.mlp.fc2.weight": "model-00001-of-00003.safetensors",
397
+ "model.layers.4.self_attn.dense.bias": "model-00001-of-00003.safetensors",
398
+ "model.layers.4.self_attn.dense.weight": "model-00001-of-00003.safetensors",
399
+ "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
400
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
401
+ "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
402
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
403
+ "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
404
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
405
+ "model.layers.5.input_layernorm.bias": "model-00001-of-00003.safetensors",
406
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
407
+ "model.layers.5.mlp.fc1.bias": "model-00001-of-00003.safetensors",
408
+ "model.layers.5.mlp.fc1.weight": "model-00001-of-00003.safetensors",
409
+ "model.layers.5.mlp.fc2.bias": "model-00001-of-00003.safetensors",
410
+ "model.layers.5.mlp.fc2.weight": "model-00001-of-00003.safetensors",
411
+ "model.layers.5.self_attn.dense.bias": "model-00001-of-00003.safetensors",
412
+ "model.layers.5.self_attn.dense.weight": "model-00001-of-00003.safetensors",
413
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
414
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
415
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
416
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
417
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
418
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
419
+ "model.layers.6.input_layernorm.bias": "model-00001-of-00003.safetensors",
420
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
421
+ "model.layers.6.mlp.fc1.bias": "model-00001-of-00003.safetensors",
422
+ "model.layers.6.mlp.fc1.weight": "model-00001-of-00003.safetensors",
423
+ "model.layers.6.mlp.fc2.bias": "model-00001-of-00003.safetensors",
424
+ "model.layers.6.mlp.fc2.weight": "model-00001-of-00003.safetensors",
425
+ "model.layers.6.self_attn.dense.bias": "model-00001-of-00003.safetensors",
426
+ "model.layers.6.self_attn.dense.weight": "model-00001-of-00003.safetensors",
427
+ "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
428
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
429
+ "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
430
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
431
+ "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
432
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
433
+ "model.layers.7.input_layernorm.bias": "model-00001-of-00003.safetensors",
434
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
435
+ "model.layers.7.mlp.fc1.bias": "model-00001-of-00003.safetensors",
436
+ "model.layers.7.mlp.fc1.weight": "model-00001-of-00003.safetensors",
437
+ "model.layers.7.mlp.fc2.bias": "model-00001-of-00003.safetensors",
438
+ "model.layers.7.mlp.fc2.weight": "model-00001-of-00003.safetensors",
439
+ "model.layers.7.self_attn.dense.bias": "model-00001-of-00003.safetensors",
440
+ "model.layers.7.self_attn.dense.weight": "model-00001-of-00003.safetensors",
441
+ "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
442
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
443
+ "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
444
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
445
+ "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
446
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
447
+ "model.layers.8.input_layernorm.bias": "model-00001-of-00003.safetensors",
448
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
449
+ "model.layers.8.mlp.fc1.bias": "model-00001-of-00003.safetensors",
450
+ "model.layers.8.mlp.fc1.weight": "model-00001-of-00003.safetensors",
451
+ "model.layers.8.mlp.fc2.bias": "model-00001-of-00003.safetensors",
452
+ "model.layers.8.mlp.fc2.weight": "model-00001-of-00003.safetensors",
453
+ "model.layers.8.self_attn.dense.bias": "model-00001-of-00003.safetensors",
454
+ "model.layers.8.self_attn.dense.weight": "model-00001-of-00003.safetensors",
455
+ "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
456
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
457
+ "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
458
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
459
+ "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
460
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
461
+ "model.layers.9.input_layernorm.bias": "model-00001-of-00003.safetensors",
462
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
463
+ "model.layers.9.mlp.fc1.bias": "model-00001-of-00003.safetensors",
464
+ "model.layers.9.mlp.fc1.weight": "model-00001-of-00003.safetensors",
465
+ "model.layers.9.mlp.fc2.bias": "model-00001-of-00003.safetensors",
466
+ "model.layers.9.mlp.fc2.weight": "model-00001-of-00003.safetensors",
467
+ "model.layers.9.self_attn.dense.bias": "model-00001-of-00003.safetensors",
468
+ "model.layers.9.self_attn.dense.weight": "model-00001-of-00003.safetensors",
469
+ "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
470
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
471
+ "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
472
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
473
+ "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
474
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors"
475
+ }
476
+ }