Model save
Browse files- README.md +2 -2
- adapter_config.json +2 -2
- adapter_model.safetensors +2 -2
- global_step0/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
- global_step0/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
- global_step0/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
- global_step0/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
- global_step0/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
- global_step0/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
- global_step0/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
- global_step0/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
- runs/Jun14_23-43-04_scarlet/events.out.tfevents.1718433802.scarlet.390669.0 +3 -0
- step_0/README.md +7 -0
- step_0/adapter_config.json +2 -2
- step_0/adapter_model.safetensors +2 -2
- step_0/global_step0/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
- step_0/global_step0/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
- step_0/global_step0/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
- step_0/global_step0/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
- step_0/global_step0/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
- step_0/global_step0/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
- step_0/global_step0/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
- step_0/global_step0/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
- step_0/training_args.bin +2 -2
- training_args.bin +2 -2
README.md
CHANGED
@@ -33,12 +33,12 @@ More information needed
|
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
- learning_rate: 2e-05
|
36 |
-
- train_batch_size:
|
37 |
- eval_batch_size: 8
|
38 |
- seed: 42
|
39 |
- distributed_type: multi-GPU
|
40 |
- num_devices: 4
|
41 |
-
- gradient_accumulation_steps:
|
42 |
- total_train_batch_size: 2048
|
43 |
- total_eval_batch_size: 32
|
44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
|
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
- learning_rate: 2e-05
|
36 |
+
- train_batch_size: 1
|
37 |
- eval_batch_size: 8
|
38 |
- seed: 42
|
39 |
- distributed_type: multi-GPU
|
40 |
- num_devices: 4
|
41 |
+
- gradient_accumulation_steps: 512
|
42 |
- total_train_batch_size: 2048
|
43 |
- total_eval_batch_size: 32
|
44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
adapter_config.json
CHANGED
@@ -18,8 +18,8 @@
|
|
18 |
"target_modules": [
|
19 |
"k_proj",
|
20 |
"o_proj",
|
21 |
-
"
|
22 |
-
"
|
23 |
],
|
24 |
"task_type": "CAUSAL_LM"
|
25 |
}
|
|
|
18 |
"target_modules": [
|
19 |
"k_proj",
|
20 |
"o_proj",
|
21 |
+
"v_proj",
|
22 |
+
"q_proj"
|
23 |
],
|
24 |
"task_type": "CAUSAL_LM"
|
25 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
|
3 |
+
size 48
|
global_step0/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 163581296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1987b3b8c5ec2ee2087601115b4d3bce9640f66079338fe76cf17108b38c2370
|
3 |
size 163581296
|
global_step0/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 163581296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8baae513c9e652358e37ad45fd3e9104a9b276bd6d3b4b83dd0dc88a2c128340
|
3 |
size 163581296
|
global_step0/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 163581296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:339bbcf8ceab8f37ba443da083a34dccf119d611b2d340505a42923c1e30c651
|
3 |
size 163581296
|
global_step0/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 163581296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23c5a301d7a6cea869a2a0cfee460016f45930acb924570ec88359f584281488
|
3 |
size 163581296
|
global_step0/zero_pp_rank_0_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4015603804
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:287ceae7af1e4722a73c36ca426957302d910fb929a8c03f939c4343a33ca8cf
|
3 |
size 4015603804
|
global_step0/zero_pp_rank_1_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4015603804
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6819e7f429aa6a2135b2eaaa439344396d7da94a524b470d80de07367883f43b
|
3 |
size 4015603804
|
global_step0/zero_pp_rank_2_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4015603804
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b67b005454636714c02c2ace2273af1e3256a314584b520d8c2966d6f32f5da3
|
3 |
size 4015603804
|
global_step0/zero_pp_rank_3_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4015603804
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a17dae117470d88032a11f8aa047eb1815d277d19f6c62d34cd5cb2911290d3e
|
3 |
size 4015603804
|
runs/Jun14_23-43-04_scarlet/events.out.tfevents.1718433802.scarlet.390669.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68be5cf7b3c835897415b852bdda2713df4c79c264ec77598c4f38aee589c003
|
3 |
+
size 4300
|
step_0/README.md
CHANGED
@@ -232,4 +232,11 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
|
|
232 |
### Framework versions
|
233 |
|
234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
- PEFT 0.6.1
|
|
|
232 |
### Framework versions
|
233 |
|
234 |
|
235 |
+
- PEFT 0.6.1
|
236 |
+
## Training procedure
|
237 |
+
|
238 |
+
|
239 |
+
### Framework versions
|
240 |
+
|
241 |
+
|
242 |
- PEFT 0.6.1
|
step_0/adapter_config.json
CHANGED
@@ -18,8 +18,8 @@
|
|
18 |
"target_modules": [
|
19 |
"k_proj",
|
20 |
"o_proj",
|
21 |
-
"
|
22 |
-
"
|
23 |
],
|
24 |
"task_type": "CAUSAL_LM"
|
25 |
}
|
|
|
18 |
"target_modules": [
|
19 |
"k_proj",
|
20 |
"o_proj",
|
21 |
+
"v_proj",
|
22 |
+
"q_proj"
|
23 |
],
|
24 |
"task_type": "CAUSAL_LM"
|
25 |
}
|
step_0/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
|
3 |
+
size 48
|
step_0/global_step0/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 163581296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1987b3b8c5ec2ee2087601115b4d3bce9640f66079338fe76cf17108b38c2370
|
3 |
size 163581296
|
step_0/global_step0/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 163581296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8baae513c9e652358e37ad45fd3e9104a9b276bd6d3b4b83dd0dc88a2c128340
|
3 |
size 163581296
|
step_0/global_step0/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 163581296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:339bbcf8ceab8f37ba443da083a34dccf119d611b2d340505a42923c1e30c651
|
3 |
size 163581296
|
step_0/global_step0/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 163581296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23c5a301d7a6cea869a2a0cfee460016f45930acb924570ec88359f584281488
|
3 |
size 163581296
|
step_0/global_step0/zero_pp_rank_0_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4015603804
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:287ceae7af1e4722a73c36ca426957302d910fb929a8c03f939c4343a33ca8cf
|
3 |
size 4015603804
|
step_0/global_step0/zero_pp_rank_1_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4015603804
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6819e7f429aa6a2135b2eaaa439344396d7da94a524b470d80de07367883f43b
|
3 |
size 4015603804
|
step_0/global_step0/zero_pp_rank_2_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4015603804
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b67b005454636714c02c2ace2273af1e3256a314584b520d8c2966d6f32f5da3
|
3 |
size 4015603804
|
step_0/global_step0/zero_pp_rank_3_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4015603804
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a17dae117470d88032a11f8aa047eb1815d277d19f6c62d34cd5cb2911290d3e
|
3 |
size 4015603804
|
step_0/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fb85e268e869937e45bd9699d2384ab1fdf675dfb8243c5da62e639a87c613b
|
3 |
+
size 5688
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fb85e268e869937e45bd9699d2384ab1fdf675dfb8243c5da62e639a87c613b
|
3 |
+
size 5688
|