Model save
Browse files
README.md
CHANGED
@@ -48,6 +48,6 @@ The following hyperparameters were used during training:
|
|
48 |
### Framework versions
|
49 |
|
50 |
- Transformers 4.35.0
|
51 |
-
- Pytorch 2.
|
52 |
-
- Datasets 2.14.
|
53 |
- Tokenizers 0.14.1
|
|
|
48 |
### Framework versions
|
49 |
|
50 |
- Transformers 4.35.0
|
51 |
+
- Pytorch 2.1.0+cu121
|
52 |
+
- Datasets 2.14.6
|
53 |
- Tokenizers 0.14.1
|
adapter_config.json
CHANGED
@@ -8,24 +8,20 @@
|
|
8 |
"init_lora_weights": true,
|
9 |
"layers_pattern": null,
|
10 |
"layers_to_transform": null,
|
11 |
-
"loftq_config": {},
|
12 |
"lora_alpha": 16,
|
13 |
"lora_dropout": 0.1,
|
14 |
-
"megatron_config": null,
|
15 |
-
"megatron_core": "megatron.core",
|
16 |
"modules_to_save": null,
|
17 |
"peft_type": "LORA",
|
18 |
"r": 64,
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
-
"
|
23 |
-
"k_proj",
|
24 |
"o_proj",
|
25 |
"up_proj",
|
26 |
-
"
|
27 |
-
"v_proj"
|
|
|
28 |
],
|
29 |
-
"task_type": "CAUSAL_LM"
|
30 |
-
"use_rslora": false
|
31 |
}
|
|
|
8 |
"init_lora_weights": true,
|
9 |
"layers_pattern": null,
|
10 |
"layers_to_transform": null,
|
|
|
11 |
"lora_alpha": 16,
|
12 |
"lora_dropout": 0.1,
|
|
|
|
|
13 |
"modules_to_save": null,
|
14 |
"peft_type": "LORA",
|
15 |
"r": 64,
|
16 |
"rank_pattern": {},
|
17 |
"revision": null,
|
18 |
"target_modules": [
|
19 |
+
"q_proj",
|
|
|
20 |
"o_proj",
|
21 |
"up_proj",
|
22 |
+
"down_proj",
|
23 |
+
"v_proj",
|
24 |
+
"k_proj"
|
25 |
],
|
26 |
+
"task_type": "CAUSAL_LM"
|
|
|
27 |
}
|
runs/May16_17-06-17_cdc34a7ce3be/events.out.tfevents.1715879257.cdc34a7ce3be.62268.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41e5b6791427747363f2bd22c96b74db429e177534dadbee660f96713c5a4c62
|
3 |
+
size 4308
|
step_0/README.md
CHANGED
@@ -18,7 +18,6 @@ base_model: meta-llama/Llama-2-7b-chat-hf
|
|
18 |
|
19 |
|
20 |
- **Developed by:** [More Information Needed]
|
21 |
-
- **Funded by [optional]:** [More Information Needed]
|
22 |
- **Shared by [optional]:** [More Information Needed]
|
23 |
- **Model type:** [More Information Needed]
|
24 |
- **Language(s) (NLP):** [More Information Needed]
|
@@ -77,7 +76,7 @@ Use the code below to get started with the model.
|
|
77 |
|
78 |
### Training Data
|
79 |
|
80 |
-
<!-- This should link to a
|
81 |
|
82 |
[More Information Needed]
|
83 |
|
@@ -108,7 +107,7 @@ Use the code below to get started with the model.
|
|
108 |
|
109 |
#### Testing Data
|
110 |
|
111 |
-
<!-- This should link to a
|
112 |
|
113 |
[More Information Needed]
|
114 |
|
@@ -199,6 +198,10 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
|
|
199 |
[More Information Needed]
|
200 |
|
201 |
|
|
|
|
|
|
|
202 |
### Framework versions
|
203 |
|
204 |
-
|
|
|
|
18 |
|
19 |
|
20 |
- **Developed by:** [More Information Needed]
|
|
|
21 |
- **Shared by [optional]:** [More Information Needed]
|
22 |
- **Model type:** [More Information Needed]
|
23 |
- **Language(s) (NLP):** [More Information Needed]
|
|
|
76 |
|
77 |
### Training Data
|
78 |
|
79 |
+
<!-- This should link to a Data Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
80 |
|
81 |
[More Information Needed]
|
82 |
|
|
|
107 |
|
108 |
#### Testing Data
|
109 |
|
110 |
+
<!-- This should link to a Data Card if possible. -->
|
111 |
|
112 |
[More Information Needed]
|
113 |
|
|
|
198 |
[More Information Needed]
|
199 |
|
200 |
|
201 |
+
## Training procedure
|
202 |
+
|
203 |
+
|
204 |
### Framework versions
|
205 |
|
206 |
+
|
207 |
+
- PEFT 0.6.1
|
step_0/adapter_config.json
CHANGED
@@ -8,24 +8,20 @@
|
|
8 |
"init_lora_weights": true,
|
9 |
"layers_pattern": null,
|
10 |
"layers_to_transform": null,
|
11 |
-
"loftq_config": {},
|
12 |
"lora_alpha": 16,
|
13 |
"lora_dropout": 0.1,
|
14 |
-
"megatron_config": null,
|
15 |
-
"megatron_core": "megatron.core",
|
16 |
"modules_to_save": null,
|
17 |
"peft_type": "LORA",
|
18 |
"r": 64,
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
-
"
|
23 |
-
"k_proj",
|
24 |
"o_proj",
|
25 |
"up_proj",
|
26 |
-
"
|
27 |
-
"v_proj"
|
|
|
28 |
],
|
29 |
-
"task_type": "CAUSAL_LM"
|
30 |
-
"use_rslora": false
|
31 |
}
|
|
|
8 |
"init_lora_weights": true,
|
9 |
"layers_pattern": null,
|
10 |
"layers_to_transform": null,
|
|
|
11 |
"lora_alpha": 16,
|
12 |
"lora_dropout": 0.1,
|
|
|
|
|
13 |
"modules_to_save": null,
|
14 |
"peft_type": "LORA",
|
15 |
"r": 64,
|
16 |
"rank_pattern": {},
|
17 |
"revision": null,
|
18 |
"target_modules": [
|
19 |
+
"q_proj",
|
|
|
20 |
"o_proj",
|
21 |
"up_proj",
|
22 |
+
"down_proj",
|
23 |
+
"v_proj",
|
24 |
+
"k_proj"
|
25 |
],
|
26 |
+
"task_type": "CAUSAL_LM"
|
|
|
27 |
}
|
step_0/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66247858b5728ead4dbdf5ac24a724ae24015e0ac1489f89e2b67562e415e726
|
3 |
size 5560
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66247858b5728ead4dbdf5ac24a724ae24015e0ac1489f89e2b67562e415e726
|
3 |
size 5560
|