frcm commited on
Commit
2db1992
1 Parent(s): 30d36b3

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -216,42 +216,4 @@ The following `bitsandbytes` quantization config was used during training:
216
  ### Framework versions
217
 
218
 
219
- - PEFT 0.6.0.dev0
220
- ## Training procedure
221
-
222
-
223
- The following `bitsandbytes` quantization config was used during training:
224
- - quant_method: bitsandbytes
225
- - load_in_8bit: False
226
- - load_in_4bit: True
227
- - llm_int8_threshold: 6.0
228
- - llm_int8_skip_modules: None
229
- - llm_int8_enable_fp32_cpu_offload: False
230
- - llm_int8_has_fp16_weight: False
231
- - bnb_4bit_quant_type: nf4
232
- - bnb_4bit_use_double_quant: True
233
- - bnb_4bit_compute_dtype: bfloat16
234
-
235
- ### Framework versions
236
-
237
-
238
- - PEFT 0.6.0.dev0
239
- ## Training procedure
240
-
241
-
242
- The following `bitsandbytes` quantization config was used during training:
243
- - quant_method: bitsandbytes
244
- - load_in_8bit: False
245
- - load_in_4bit: True
246
- - llm_int8_threshold: 6.0
247
- - llm_int8_skip_modules: None
248
- - llm_int8_enable_fp32_cpu_offload: False
249
- - llm_int8_has_fp16_weight: False
250
- - bnb_4bit_quant_type: nf4
251
- - bnb_4bit_use_double_quant: True
252
- - bnb_4bit_compute_dtype: bfloat16
253
-
254
- ### Framework versions
255
-
256
-
257
  - PEFT 0.6.0.dev0
 
216
  ### Framework versions
217
 
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  - PEFT 0.6.0.dev0
adapter_config.json CHANGED
@@ -12,18 +12,18 @@
12
  "lora_dropout": 0.05,
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
- "r": 12,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "v_proj",
20
  "q_proj",
 
 
21
  "gate_proj",
22
  "up_proj",
23
- "o_proj",
24
  "lm_head",
25
- "down_proj",
26
- "k_proj"
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
 
12
  "lora_dropout": 0.05,
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
+ "r": 10,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "k_proj",
20
  "q_proj",
21
+ "o_proj",
22
+ "v_proj",
23
  "gate_proj",
24
  "up_proj",
 
25
  "lm_head",
26
+ "down_proj"
 
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2150c8fa66f0ac27454b3bdcd5f06b4c6d50dc0a4daf184b0c9366ccdf1a53b
3
- size 127723221
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e656ad14e1d76b0391a061c1797fbf326c79c057f1731672e11cb75b6a6e1fa6
3
+ size 106462933
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ee9e99599620e6d99b7c0f7860a3e128ca1e019457489dfc988dff60f05bd2a
3
- size 64412071
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6393c7ed447f7a3f01c6918424051ce72b5a6b770778a57372124a39168a7cfe
3
+ size 53769383
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaad1333a3bbd8bb7f42087a3d194baa5d5b31e31c620c9b7c1a1bfdb3b9fdc0
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46da827e165696189c34c2493b211d8adc8abaffc57cfa6ca30d52808eb30a0e
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24df190247b5dc787d466cf5337ca4a8c96908de4b7a00392f0a5223b5941c70
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08342e596f9ce0d2810a89135af8758741c90d0421d3905ccfac5d85179e1963
3
  size 627
trainer_state.json CHANGED
@@ -1,105 +1,137 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1363636363636362,
5
  "eval_steps": 50,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.11,
13
- "eval_loss": 1.453033447265625,
14
- "eval_runtime": 11.4227,
15
- "eval_samples_per_second": 9.63,
16
- "eval_steps_per_second": 1.226,
17
  "step": 50
18
  },
19
  {
20
- "epoch": 0.23,
21
- "eval_loss": 1.4157308340072632,
22
- "eval_runtime": 11.406,
23
- "eval_samples_per_second": 9.644,
24
- "eval_steps_per_second": 1.227,
25
  "step": 100
26
  },
27
  {
28
- "epoch": 0.34,
29
- "eval_loss": 1.3941584825515747,
30
- "eval_runtime": 11.4049,
31
- "eval_samples_per_second": 9.645,
32
- "eval_steps_per_second": 1.228,
33
  "step": 150
34
  },
35
  {
36
- "epoch": 0.45,
37
- "eval_loss": 1.3821239471435547,
38
- "eval_runtime": 11.4018,
39
- "eval_samples_per_second": 9.648,
40
- "eval_steps_per_second": 1.228,
41
  "step": 200
42
  },
43
  {
44
- "epoch": 0.57,
45
- "eval_loss": 1.3709255456924438,
46
- "eval_runtime": 11.3981,
47
- "eval_samples_per_second": 9.651,
48
- "eval_steps_per_second": 1.228,
49
  "step": 250
50
  },
51
  {
52
- "epoch": 0.68,
53
- "eval_loss": 1.3664299249649048,
54
- "eval_runtime": 11.3976,
55
- "eval_samples_per_second": 9.651,
56
- "eval_steps_per_second": 1.228,
57
  "step": 300
58
  },
59
  {
60
- "epoch": 0.8,
61
- "eval_loss": 1.3568395376205444,
62
- "eval_runtime": 11.4,
63
- "eval_samples_per_second": 9.649,
64
- "eval_steps_per_second": 1.228,
65
  "step": 350
66
  },
67
  {
68
- "epoch": 0.91,
69
- "eval_loss": 1.3490984439849854,
70
- "eval_runtime": 11.3989,
71
- "eval_samples_per_second": 9.65,
72
- "eval_steps_per_second": 1.228,
73
  "step": 400
74
  },
75
  {
76
- "epoch": 1.02,
77
- "eval_loss": 1.3464051485061646,
78
- "eval_runtime": 11.3981,
79
- "eval_samples_per_second": 9.651,
80
- "eval_steps_per_second": 1.228,
81
  "step": 450
82
  },
83
  {
84
- "epoch": 1.14,
85
- "learning_rate": 0.0,
86
- "loss": 1.3927,
87
  "step": 500
88
  },
89
  {
90
- "epoch": 1.14,
91
- "eval_loss": 1.343943476676941,
92
- "eval_runtime": 11.3987,
93
- "eval_samples_per_second": 9.65,
94
- "eval_steps_per_second": 1.228,
95
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  ],
98
  "logging_steps": 500,
99
- "max_steps": 500,
100
- "num_train_epochs": 2,
101
  "save_steps": 50,
102
- "total_flos": 2.6741709176832e+16,
103
  "trial_name": null,
104
  "trial_params": null
105
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9446693657219973,
5
  "eval_steps": 50,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.07,
13
+ "eval_loss": 1.4132792949676514,
14
+ "eval_runtime": 24.6579,
15
+ "eval_samples_per_second": 4.056,
16
+ "eval_steps_per_second": 0.527,
17
  "step": 50
18
  },
19
  {
20
+ "epoch": 0.13,
21
+ "eval_loss": 1.3802086114883423,
22
+ "eval_runtime": 24.6958,
23
+ "eval_samples_per_second": 4.049,
24
+ "eval_steps_per_second": 0.526,
25
  "step": 100
26
  },
27
  {
28
+ "epoch": 0.2,
29
+ "eval_loss": 1.3620883226394653,
30
+ "eval_runtime": 24.6721,
31
+ "eval_samples_per_second": 4.053,
32
+ "eval_steps_per_second": 0.527,
33
  "step": 150
34
  },
35
  {
36
+ "epoch": 0.27,
37
+ "eval_loss": 1.34859037399292,
38
+ "eval_runtime": 24.7313,
39
+ "eval_samples_per_second": 4.043,
40
+ "eval_steps_per_second": 0.526,
41
  "step": 200
42
  },
43
  {
44
+ "epoch": 0.34,
45
+ "eval_loss": 1.3401516675949097,
46
+ "eval_runtime": 24.6981,
47
+ "eval_samples_per_second": 4.049,
48
+ "eval_steps_per_second": 0.526,
49
  "step": 250
50
  },
51
  {
52
+ "epoch": 0.4,
53
+ "eval_loss": 1.3350552320480347,
54
+ "eval_runtime": 24.7526,
55
+ "eval_samples_per_second": 4.04,
56
+ "eval_steps_per_second": 0.525,
57
  "step": 300
58
  },
59
  {
60
+ "epoch": 0.47,
61
+ "eval_loss": 1.3288078308105469,
62
+ "eval_runtime": 24.7114,
63
+ "eval_samples_per_second": 4.047,
64
+ "eval_steps_per_second": 0.526,
65
  "step": 350
66
  },
67
  {
68
+ "epoch": 0.54,
69
+ "eval_loss": 1.3192832469940186,
70
+ "eval_runtime": 24.7246,
71
+ "eval_samples_per_second": 4.045,
72
+ "eval_steps_per_second": 0.526,
73
  "step": 400
74
  },
75
  {
76
+ "epoch": 0.61,
77
+ "eval_loss": 1.3144173622131348,
78
+ "eval_runtime": 24.6861,
79
+ "eval_samples_per_second": 4.051,
80
+ "eval_steps_per_second": 0.527,
81
  "step": 450
82
  },
83
  {
84
+ "epoch": 0.67,
85
+ "learning_rate": 7.224606580829757e-06,
86
+ "loss": 1.3853,
87
  "step": 500
88
  },
89
  {
90
+ "epoch": 0.67,
91
+ "eval_loss": 1.3092302083969116,
92
+ "eval_runtime": 24.7017,
93
+ "eval_samples_per_second": 4.048,
94
+ "eval_steps_per_second": 0.526,
95
  "step": 500
96
+ },
97
+ {
98
+ "epoch": 0.74,
99
+ "eval_loss": 1.303202509880066,
100
+ "eval_runtime": 24.6861,
101
+ "eval_samples_per_second": 4.051,
102
+ "eval_steps_per_second": 0.527,
103
+ "step": 550
104
+ },
105
+ {
106
+ "epoch": 0.81,
107
+ "eval_loss": 1.29935884475708,
108
+ "eval_runtime": 24.6791,
109
+ "eval_samples_per_second": 4.052,
110
+ "eval_steps_per_second": 0.527,
111
+ "step": 600
112
+ },
113
+ {
114
+ "epoch": 0.88,
115
+ "eval_loss": 1.2970906496047974,
116
+ "eval_runtime": 24.7065,
117
+ "eval_samples_per_second": 4.048,
118
+ "eval_steps_per_second": 0.526,
119
+ "step": 650
120
+ },
121
+ {
122
+ "epoch": 0.94,
123
+ "eval_loss": 1.2959811687469482,
124
+ "eval_runtime": 24.7115,
125
+ "eval_samples_per_second": 4.047,
126
+ "eval_steps_per_second": 0.526,
127
+ "step": 700
128
  }
129
  ],
130
  "logging_steps": 500,
131
+ "max_steps": 700,
132
+ "num_train_epochs": 1,
133
  "save_steps": 50,
134
+ "total_flos": 4.49645833728e+16,
135
  "trial_name": null,
136
  "trial_params": null
137
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2a1de93ca3f75dfe8a6874d4a701715ed98c1760f8beb885a31aa9489fe5d89
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:127d5f0e7e6f41442f9837e46f4f0568cb2b6b2eb071d6ef7a98e02aadc0d78d
3
  size 4027