fbjr commited on
Commit
da433e0
1 Parent(s): 2a39438

Upload 96 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-1000/README.md +34 -0
  2. checkpoint-1000/adapter_config.json +21 -0
  3. checkpoint-1000/adapter_model.bin +3 -0
  4. checkpoint-1000/optimizer.pt +3 -0
  5. checkpoint-1000/rng_state.pth +3 -0
  6. checkpoint-1000/scheduler.pt +3 -0
  7. checkpoint-1000/special_tokens_map.json +30 -0
  8. checkpoint-1000/tokenizer.json +0 -0
  9. checkpoint-1000/tokenizer.model +3 -0
  10. checkpoint-1000/tokenizer_config.json +43 -0
  11. checkpoint-1000/trainer_state.json +299 -0
  12. checkpoint-1000/training_args.bin +3 -0
  13. checkpoint-650/README.md +34 -0
  14. checkpoint-650/adapter_config.json +21 -0
  15. checkpoint-650/adapter_model.bin +3 -0
  16. checkpoint-650/optimizer.pt +3 -0
  17. checkpoint-650/rng_state.pth +3 -0
  18. checkpoint-650/scheduler.pt +3 -0
  19. checkpoint-650/special_tokens_map.json +30 -0
  20. checkpoint-650/tokenizer.json +0 -0
  21. checkpoint-650/tokenizer.model +3 -0
  22. checkpoint-650/tokenizer_config.json +43 -0
  23. checkpoint-650/trainer_state.json +201 -0
  24. checkpoint-650/training_args.bin +3 -0
  25. checkpoint-700/README.md +34 -0
  26. checkpoint-700/adapter_config.json +21 -0
  27. checkpoint-700/adapter_model.bin +3 -0
  28. checkpoint-700/optimizer.pt +3 -0
  29. checkpoint-700/rng_state.pth +3 -0
  30. checkpoint-700/scheduler.pt +3 -0
  31. checkpoint-700/special_tokens_map.json +30 -0
  32. checkpoint-700/tokenizer.json +0 -0
  33. checkpoint-700/tokenizer.model +3 -0
  34. checkpoint-700/tokenizer_config.json +43 -0
  35. checkpoint-700/trainer_state.json +215 -0
  36. checkpoint-700/training_args.bin +3 -0
  37. checkpoint-750/README.md +34 -0
  38. checkpoint-750/adapter_config.json +21 -0
  39. checkpoint-750/adapter_model.bin +3 -0
  40. checkpoint-750/optimizer.pt +3 -0
  41. checkpoint-750/rng_state.pth +3 -0
  42. checkpoint-750/scheduler.pt +3 -0
  43. checkpoint-750/special_tokens_map.json +30 -0
  44. checkpoint-750/tokenizer.json +0 -0
  45. checkpoint-750/tokenizer.model +3 -0
  46. checkpoint-750/tokenizer_config.json +43 -0
  47. checkpoint-750/trainer_state.json +229 -0
  48. checkpoint-750/training_args.bin +3 -0
  49. checkpoint-800/README.md +34 -0
  50. checkpoint-800/adapter_config.json +21 -0
checkpoint-1000/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: False
29
+ - bnb_4bit_compute_dtype: float16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.5.0
33
+
34
+ - PEFT 0.5.0
checkpoint-1000/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
checkpoint-1000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daaad141fea83324c4d9cc46d519ae6209e59cf85403e1b85c99c4909f315390
3
+ size 134263757
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7bc397bd6712abaf443439beb64165a6427e271fd54ece91328c91c99556c1c
3
+ size 268543173
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf2b4a2167c377d54df6d4c41cab1c9e8b318fec7c710540635106bd22340fa
3
+ size 14575
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa84e40719b997939d68759cd6081e20cd194a4249148874782150e77884095
3
+ size 627
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "content": "<s>",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "eos_token": {
16
+ "content": "</s>",
17
+ "lstrip": false,
18
+ "normalized": true,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
+ "pad_token": "</s>",
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "__type": "AddedToken",
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "clean_up_tokenization_spaces": false,
17
+ "eos_token": {
18
+ "__type": "AddedToken",
19
+ "content": "</s>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eot_token": "▁<EOT>",
26
+ "fill_token": "<FILL_ME>",
27
+ "legacy": null,
28
+ "middle_token": "▁<MID>",
29
+ "model_max_length": 1000000000000000019884624838656,
30
+ "pad_token": null,
31
+ "prefix_token": "▁<PRE>",
32
+ "sp_model_kwargs": {},
33
+ "suffix_token": "▁<SUF>",
34
+ "tokenizer_class": "CodeLlamaTokenizer",
35
+ "unk_token": {
36
+ "__type": "AddedToken",
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.30298439630359036,
5
+ "eval_steps": 50,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "learning_rate": 0.00019,
14
+ "loss": 1.4805,
15
+ "step": 50
16
+ },
17
+ {
18
+ "epoch": 0.02,
19
+ "eval_loss": 0.935647189617157,
20
+ "eval_runtime": 1424.1427,
21
+ "eval_samples_per_second": 9.27,
22
+ "eval_steps_per_second": 1.159,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.03,
27
+ "learning_rate": 0.00018,
28
+ "loss": 0.8948,
29
+ "step": 100
30
+ },
31
+ {
32
+ "epoch": 0.03,
33
+ "eval_loss": 0.8673275709152222,
34
+ "eval_runtime": 1447.9268,
35
+ "eval_samples_per_second": 9.118,
36
+ "eval_steps_per_second": 1.14,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.05,
41
+ "learning_rate": 0.00017,
42
+ "loss": 0.8357,
43
+ "step": 150
44
+ },
45
+ {
46
+ "epoch": 0.05,
47
+ "eval_loss": 0.8413857221603394,
48
+ "eval_runtime": 1449.9965,
49
+ "eval_samples_per_second": 9.105,
50
+ "eval_steps_per_second": 1.139,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.06,
55
+ "learning_rate": 0.00016,
56
+ "loss": 0.8461,
57
+ "step": 200
58
+ },
59
+ {
60
+ "epoch": 0.06,
61
+ "eval_loss": 0.8236712217330933,
62
+ "eval_runtime": 1428.6867,
63
+ "eval_samples_per_second": 9.241,
64
+ "eval_steps_per_second": 1.156,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.08,
69
+ "learning_rate": 0.00015000000000000001,
70
+ "loss": 0.8046,
71
+ "step": 250
72
+ },
73
+ {
74
+ "epoch": 0.08,
75
+ "eval_loss": 0.7915458679199219,
76
+ "eval_runtime": 1422.1414,
77
+ "eval_samples_per_second": 9.283,
78
+ "eval_steps_per_second": 1.161,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.09,
83
+ "learning_rate": 0.00014,
84
+ "loss": 0.744,
85
+ "step": 300
86
+ },
87
+ {
88
+ "epoch": 0.09,
89
+ "eval_loss": 0.7999407649040222,
90
+ "eval_runtime": 1408.4646,
91
+ "eval_samples_per_second": 9.373,
92
+ "eval_steps_per_second": 1.172,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.11,
97
+ "learning_rate": 0.00013000000000000002,
98
+ "loss": 0.7219,
99
+ "step": 350
100
+ },
101
+ {
102
+ "epoch": 0.11,
103
+ "eval_loss": 0.7075337171554565,
104
+ "eval_runtime": 1407.3666,
105
+ "eval_samples_per_second": 9.381,
106
+ "eval_steps_per_second": 1.173,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.12,
111
+ "learning_rate": 0.00012,
112
+ "loss": 0.7027,
113
+ "step": 400
114
+ },
115
+ {
116
+ "epoch": 0.12,
117
+ "eval_loss": 0.697420060634613,
118
+ "eval_runtime": 1391.6821,
119
+ "eval_samples_per_second": 9.486,
120
+ "eval_steps_per_second": 1.186,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.14,
125
+ "learning_rate": 0.00011000000000000002,
126
+ "loss": 0.6982,
127
+ "step": 450
128
+ },
129
+ {
130
+ "epoch": 0.14,
131
+ "eval_loss": 0.6917020678520203,
132
+ "eval_runtime": 1383.9747,
133
+ "eval_samples_per_second": 9.539,
134
+ "eval_steps_per_second": 1.193,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.15,
139
+ "learning_rate": 0.0001,
140
+ "loss": 0.6746,
141
+ "step": 500
142
+ },
143
+ {
144
+ "epoch": 0.15,
145
+ "eval_loss": 0.6870374083518982,
146
+ "eval_runtime": 1381.4308,
147
+ "eval_samples_per_second": 9.557,
148
+ "eval_steps_per_second": 1.195,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.17,
153
+ "learning_rate": 9e-05,
154
+ "loss": 0.6667,
155
+ "step": 550
156
+ },
157
+ {
158
+ "epoch": 0.17,
159
+ "eval_loss": 0.6836341619491577,
160
+ "eval_runtime": 1384.702,
161
+ "eval_samples_per_second": 9.534,
162
+ "eval_steps_per_second": 1.192,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.18,
167
+ "learning_rate": 8e-05,
168
+ "loss": 0.6872,
169
+ "step": 600
170
+ },
171
+ {
172
+ "epoch": 0.18,
173
+ "eval_loss": 0.6808720231056213,
174
+ "eval_runtime": 1386.676,
175
+ "eval_samples_per_second": 9.521,
176
+ "eval_steps_per_second": 1.191,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.2,
181
+ "learning_rate": 7e-05,
182
+ "loss": 0.6793,
183
+ "step": 650
184
+ },
185
+ {
186
+ "epoch": 0.2,
187
+ "eval_loss": 0.6773844957351685,
188
+ "eval_runtime": 1607.2067,
189
+ "eval_samples_per_second": 8.214,
190
+ "eval_steps_per_second": 1.027,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.21,
195
+ "learning_rate": 6e-05,
196
+ "loss": 0.6989,
197
+ "step": 700
198
+ },
199
+ {
200
+ "epoch": 0.21,
201
+ "eval_loss": 0.6752211451530457,
202
+ "eval_runtime": 1618.5926,
203
+ "eval_samples_per_second": 8.156,
204
+ "eval_steps_per_second": 1.02,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.23,
209
+ "learning_rate": 5e-05,
210
+ "loss": 0.6525,
211
+ "step": 750
212
+ },
213
+ {
214
+ "epoch": 0.23,
215
+ "eval_loss": 0.6731519103050232,
216
+ "eval_runtime": 1621.8237,
217
+ "eval_samples_per_second": 8.14,
218
+ "eval_steps_per_second": 1.018,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.24,
223
+ "learning_rate": 4e-05,
224
+ "loss": 0.6722,
225
+ "step": 800
226
+ },
227
+ {
228
+ "epoch": 0.24,
229
+ "eval_loss": 0.6716203689575195,
230
+ "eval_runtime": 1622.2693,
231
+ "eval_samples_per_second": 8.138,
232
+ "eval_steps_per_second": 1.018,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.26,
237
+ "learning_rate": 3e-05,
238
+ "loss": 0.6687,
239
+ "step": 850
240
+ },
241
+ {
242
+ "epoch": 0.26,
243
+ "eval_loss": 0.6699801683425903,
244
+ "eval_runtime": 1622.5334,
245
+ "eval_samples_per_second": 8.137,
246
+ "eval_steps_per_second": 1.018,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.27,
251
+ "learning_rate": 2e-05,
252
+ "loss": 0.6381,
253
+ "step": 900
254
+ },
255
+ {
256
+ "epoch": 0.27,
257
+ "eval_loss": 0.6691889762878418,
258
+ "eval_runtime": 1621.8507,
259
+ "eval_samples_per_second": 8.14,
260
+ "eval_steps_per_second": 1.018,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.29,
265
+ "learning_rate": 1e-05,
266
+ "loss": 0.6632,
267
+ "step": 950
268
+ },
269
+ {
270
+ "epoch": 0.29,
271
+ "eval_loss": 0.6684426069259644,
272
+ "eval_runtime": 1609.6365,
273
+ "eval_samples_per_second": 8.202,
274
+ "eval_steps_per_second": 1.026,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.3,
279
+ "learning_rate": 0.0,
280
+ "loss": 0.6683,
281
+ "step": 1000
282
+ },
283
+ {
284
+ "epoch": 0.3,
285
+ "eval_loss": 0.668069064617157,
286
+ "eval_runtime": 1595.5055,
287
+ "eval_samples_per_second": 8.274,
288
+ "eval_steps_per_second": 1.035,
289
+ "step": 1000
290
+ }
291
+ ],
292
+ "logging_steps": 50,
293
+ "max_steps": 1000,
294
+ "num_train_epochs": 1,
295
+ "save_steps": 50,
296
+ "total_flos": 7.51250442533929e+16,
297
+ "trial_name": null,
298
+ "trial_params": null
299
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7333e78b084eeaece18c6fb119affbad52fe2adda65cecdd7c2cc539cbe1447e
3
+ size 3963
checkpoint-650/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: False
29
+ - bnb_4bit_compute_dtype: float16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.5.0
33
+
34
+ - PEFT 0.5.0
checkpoint-650/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
checkpoint-650/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e6cd33791988e24404fb9824093834ed29e214f46beab6c893786df4a3e4fbd
3
+ size 134263757
checkpoint-650/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5923b5f81975abccf1c319dbe3533e4619ce551c9b6673518536ce5d6457b240
3
+ size 268543173
checkpoint-650/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e66a78467c0a7d99442778ddd3064c80925583a181dee52e58e7f24c79a82546
3
+ size 14575
checkpoint-650/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bf25380de37cadf5d3522180134caf595413987149b73e6b367c794140b2a9b
3
+ size 627
checkpoint-650/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "content": "<s>",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "eos_token": {
16
+ "content": "</s>",
17
+ "lstrip": false,
18
+ "normalized": true,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
+ "pad_token": "</s>",
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-650/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-650/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
checkpoint-650/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "__type": "AddedToken",
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "clean_up_tokenization_spaces": false,
17
+ "eos_token": {
18
+ "__type": "AddedToken",
19
+ "content": "</s>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eot_token": "▁<EOT>",
26
+ "fill_token": "<FILL_ME>",
27
+ "legacy": null,
28
+ "middle_token": "▁<MID>",
29
+ "model_max_length": 1000000000000000019884624838656,
30
+ "pad_token": null,
31
+ "prefix_token": "▁<PRE>",
32
+ "sp_model_kwargs": {},
33
+ "suffix_token": "▁<SUF>",
34
+ "tokenizer_class": "CodeLlamaTokenizer",
35
+ "unk_token": {
36
+ "__type": "AddedToken",
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
checkpoint-650/trainer_state.json ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.19693985759733373,
5
+ "eval_steps": 50,
6
+ "global_step": 650,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "learning_rate": 0.00019,
14
+ "loss": 1.4805,
15
+ "step": 50
16
+ },
17
+ {
18
+ "epoch": 0.02,
19
+ "eval_loss": 0.935647189617157,
20
+ "eval_runtime": 1424.1427,
21
+ "eval_samples_per_second": 9.27,
22
+ "eval_steps_per_second": 1.159,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.03,
27
+ "learning_rate": 0.00018,
28
+ "loss": 0.8948,
29
+ "step": 100
30
+ },
31
+ {
32
+ "epoch": 0.03,
33
+ "eval_loss": 0.8673275709152222,
34
+ "eval_runtime": 1447.9268,
35
+ "eval_samples_per_second": 9.118,
36
+ "eval_steps_per_second": 1.14,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.05,
41
+ "learning_rate": 0.00017,
42
+ "loss": 0.8357,
43
+ "step": 150
44
+ },
45
+ {
46
+ "epoch": 0.05,
47
+ "eval_loss": 0.8413857221603394,
48
+ "eval_runtime": 1449.9965,
49
+ "eval_samples_per_second": 9.105,
50
+ "eval_steps_per_second": 1.139,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.06,
55
+ "learning_rate": 0.00016,
56
+ "loss": 0.8461,
57
+ "step": 200
58
+ },
59
+ {
60
+ "epoch": 0.06,
61
+ "eval_loss": 0.8236712217330933,
62
+ "eval_runtime": 1428.6867,
63
+ "eval_samples_per_second": 9.241,
64
+ "eval_steps_per_second": 1.156,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.08,
69
+ "learning_rate": 0.00015000000000000001,
70
+ "loss": 0.8046,
71
+ "step": 250
72
+ },
73
+ {
74
+ "epoch": 0.08,
75
+ "eval_loss": 0.7915458679199219,
76
+ "eval_runtime": 1422.1414,
77
+ "eval_samples_per_second": 9.283,
78
+ "eval_steps_per_second": 1.161,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.09,
83
+ "learning_rate": 0.00014,
84
+ "loss": 0.744,
85
+ "step": 300
86
+ },
87
+ {
88
+ "epoch": 0.09,
89
+ "eval_loss": 0.7999407649040222,
90
+ "eval_runtime": 1408.4646,
91
+ "eval_samples_per_second": 9.373,
92
+ "eval_steps_per_second": 1.172,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.11,
97
+ "learning_rate": 0.00013000000000000002,
98
+ "loss": 0.7219,
99
+ "step": 350
100
+ },
101
+ {
102
+ "epoch": 0.11,
103
+ "eval_loss": 0.7075337171554565,
104
+ "eval_runtime": 1407.3666,
105
+ "eval_samples_per_second": 9.381,
106
+ "eval_steps_per_second": 1.173,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.12,
111
+ "learning_rate": 0.00012,
112
+ "loss": 0.7027,
113
+ "step": 400
114
+ },
115
+ {
116
+ "epoch": 0.12,
117
+ "eval_loss": 0.697420060634613,
118
+ "eval_runtime": 1391.6821,
119
+ "eval_samples_per_second": 9.486,
120
+ "eval_steps_per_second": 1.186,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.14,
125
+ "learning_rate": 0.00011000000000000002,
126
+ "loss": 0.6982,
127
+ "step": 450
128
+ },
129
+ {
130
+ "epoch": 0.14,
131
+ "eval_loss": 0.6917020678520203,
132
+ "eval_runtime": 1383.9747,
133
+ "eval_samples_per_second": 9.539,
134
+ "eval_steps_per_second": 1.193,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.15,
139
+ "learning_rate": 0.0001,
140
+ "loss": 0.6746,
141
+ "step": 500
142
+ },
143
+ {
144
+ "epoch": 0.15,
145
+ "eval_loss": 0.6870374083518982,
146
+ "eval_runtime": 1381.4308,
147
+ "eval_samples_per_second": 9.557,
148
+ "eval_steps_per_second": 1.195,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.17,
153
+ "learning_rate": 9e-05,
154
+ "loss": 0.6667,
155
+ "step": 550
156
+ },
157
+ {
158
+ "epoch": 0.17,
159
+ "eval_loss": 0.6836341619491577,
160
+ "eval_runtime": 1384.702,
161
+ "eval_samples_per_second": 9.534,
162
+ "eval_steps_per_second": 1.192,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.18,
167
+ "learning_rate": 8e-05,
168
+ "loss": 0.6872,
169
+ "step": 600
170
+ },
171
+ {
172
+ "epoch": 0.18,
173
+ "eval_loss": 0.6808720231056213,
174
+ "eval_runtime": 1386.676,
175
+ "eval_samples_per_second": 9.521,
176
+ "eval_steps_per_second": 1.191,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.2,
181
+ "learning_rate": 7e-05,
182
+ "loss": 0.6793,
183
+ "step": 650
184
+ },
185
+ {
186
+ "epoch": 0.2,
187
+ "eval_loss": 0.6773844957351685,
188
+ "eval_runtime": 1607.2067,
189
+ "eval_samples_per_second": 8.214,
190
+ "eval_steps_per_second": 1.027,
191
+ "step": 650
192
+ }
193
+ ],
194
+ "logging_steps": 50,
195
+ "max_steps": 1000,
196
+ "num_train_epochs": 1,
197
+ "save_steps": 50,
198
+ "total_flos": 4.863070688831078e+16,
199
+ "trial_name": null,
200
+ "trial_params": null
201
+ }
checkpoint-650/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7333e78b084eeaece18c6fb119affbad52fe2adda65cecdd7c2cc539cbe1447e
3
+ size 3963
checkpoint-700/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: False
29
+ - bnb_4bit_compute_dtype: float16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.5.0
33
+
34
+ - PEFT 0.5.0
checkpoint-700/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
checkpoint-700/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb1873a01a5b5d7d7c9be2ed09dfac9163542886241689864d3cc4ed501caf38
3
+ size 134263757
checkpoint-700/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b346d9be0e446f278c833dc8cd51decd9c12b240bc09622e740f64367101e50a
3
+ size 268543173
checkpoint-700/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16302bb12137762ebfb63b657bece6ae4f53061075918d1f1e147c740dc7c352
3
+ size 14575
checkpoint-700/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:864c3e335dc1cb95968d6e74ea460ccec144b2ce47e691c30586e1283f01ebd3
3
+ size 627
checkpoint-700/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "content": "<s>",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "eos_token": {
16
+ "content": "</s>",
17
+ "lstrip": false,
18
+ "normalized": true,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
+ "pad_token": "</s>",
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-700/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-700/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
checkpoint-700/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "__type": "AddedToken",
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "clean_up_tokenization_spaces": false,
17
+ "eos_token": {
18
+ "__type": "AddedToken",
19
+ "content": "</s>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eot_token": "▁<EOT>",
26
+ "fill_token": "<FILL_ME>",
27
+ "legacy": null,
28
+ "middle_token": "▁<MID>",
29
+ "model_max_length": 1000000000000000019884624838656,
30
+ "pad_token": null,
31
+ "prefix_token": "▁<PRE>",
32
+ "sp_model_kwargs": {},
33
+ "suffix_token": "▁<SUF>",
34
+ "tokenizer_class": "CodeLlamaTokenizer",
35
+ "unk_token": {
36
+ "__type": "AddedToken",
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
checkpoint-700/trainer_state.json ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.21208907741251326,
5
+ "eval_steps": 50,
6
+ "global_step": 700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "learning_rate": 0.00019,
14
+ "loss": 1.4805,
15
+ "step": 50
16
+ },
17
+ {
18
+ "epoch": 0.02,
19
+ "eval_loss": 0.935647189617157,
20
+ "eval_runtime": 1424.1427,
21
+ "eval_samples_per_second": 9.27,
22
+ "eval_steps_per_second": 1.159,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.03,
27
+ "learning_rate": 0.00018,
28
+ "loss": 0.8948,
29
+ "step": 100
30
+ },
31
+ {
32
+ "epoch": 0.03,
33
+ "eval_loss": 0.8673275709152222,
34
+ "eval_runtime": 1447.9268,
35
+ "eval_samples_per_second": 9.118,
36
+ "eval_steps_per_second": 1.14,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.05,
41
+ "learning_rate": 0.00017,
42
+ "loss": 0.8357,
43
+ "step": 150
44
+ },
45
+ {
46
+ "epoch": 0.05,
47
+ "eval_loss": 0.8413857221603394,
48
+ "eval_runtime": 1449.9965,
49
+ "eval_samples_per_second": 9.105,
50
+ "eval_steps_per_second": 1.139,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.06,
55
+ "learning_rate": 0.00016,
56
+ "loss": 0.8461,
57
+ "step": 200
58
+ },
59
+ {
60
+ "epoch": 0.06,
61
+ "eval_loss": 0.8236712217330933,
62
+ "eval_runtime": 1428.6867,
63
+ "eval_samples_per_second": 9.241,
64
+ "eval_steps_per_second": 1.156,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.08,
69
+ "learning_rate": 0.00015000000000000001,
70
+ "loss": 0.8046,
71
+ "step": 250
72
+ },
73
+ {
74
+ "epoch": 0.08,
75
+ "eval_loss": 0.7915458679199219,
76
+ "eval_runtime": 1422.1414,
77
+ "eval_samples_per_second": 9.283,
78
+ "eval_steps_per_second": 1.161,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.09,
83
+ "learning_rate": 0.00014,
84
+ "loss": 0.744,
85
+ "step": 300
86
+ },
87
+ {
88
+ "epoch": 0.09,
89
+ "eval_loss": 0.7999407649040222,
90
+ "eval_runtime": 1408.4646,
91
+ "eval_samples_per_second": 9.373,
92
+ "eval_steps_per_second": 1.172,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.11,
97
+ "learning_rate": 0.00013000000000000002,
98
+ "loss": 0.7219,
99
+ "step": 350
100
+ },
101
+ {
102
+ "epoch": 0.11,
103
+ "eval_loss": 0.7075337171554565,
104
+ "eval_runtime": 1407.3666,
105
+ "eval_samples_per_second": 9.381,
106
+ "eval_steps_per_second": 1.173,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.12,
111
+ "learning_rate": 0.00012,
112
+ "loss": 0.7027,
113
+ "step": 400
114
+ },
115
+ {
116
+ "epoch": 0.12,
117
+ "eval_loss": 0.697420060634613,
118
+ "eval_runtime": 1391.6821,
119
+ "eval_samples_per_second": 9.486,
120
+ "eval_steps_per_second": 1.186,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.14,
125
+ "learning_rate": 0.00011000000000000002,
126
+ "loss": 0.6982,
127
+ "step": 450
128
+ },
129
+ {
130
+ "epoch": 0.14,
131
+ "eval_loss": 0.6917020678520203,
132
+ "eval_runtime": 1383.9747,
133
+ "eval_samples_per_second": 9.539,
134
+ "eval_steps_per_second": 1.193,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.15,
139
+ "learning_rate": 0.0001,
140
+ "loss": 0.6746,
141
+ "step": 500
142
+ },
143
+ {
144
+ "epoch": 0.15,
145
+ "eval_loss": 0.6870374083518982,
146
+ "eval_runtime": 1381.4308,
147
+ "eval_samples_per_second": 9.557,
148
+ "eval_steps_per_second": 1.195,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.17,
153
+ "learning_rate": 9e-05,
154
+ "loss": 0.6667,
155
+ "step": 550
156
+ },
157
+ {
158
+ "epoch": 0.17,
159
+ "eval_loss": 0.6836341619491577,
160
+ "eval_runtime": 1384.702,
161
+ "eval_samples_per_second": 9.534,
162
+ "eval_steps_per_second": 1.192,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.18,
167
+ "learning_rate": 8e-05,
168
+ "loss": 0.6872,
169
+ "step": 600
170
+ },
171
+ {
172
+ "epoch": 0.18,
173
+ "eval_loss": 0.6808720231056213,
174
+ "eval_runtime": 1386.676,
175
+ "eval_samples_per_second": 9.521,
176
+ "eval_steps_per_second": 1.191,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.2,
181
+ "learning_rate": 7e-05,
182
+ "loss": 0.6793,
183
+ "step": 650
184
+ },
185
+ {
186
+ "epoch": 0.2,
187
+ "eval_loss": 0.6773844957351685,
188
+ "eval_runtime": 1607.2067,
189
+ "eval_samples_per_second": 8.214,
190
+ "eval_steps_per_second": 1.027,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.21,
195
+ "learning_rate": 6e-05,
196
+ "loss": 0.6989,
197
+ "step": 700
198
+ },
199
+ {
200
+ "epoch": 0.21,
201
+ "eval_loss": 0.6752211451530457,
202
+ "eval_runtime": 1618.5926,
203
+ "eval_samples_per_second": 8.156,
204
+ "eval_steps_per_second": 1.02,
205
+ "step": 700
206
+ }
207
+ ],
208
+ "logging_steps": 50,
209
+ "max_steps": 1000,
210
+ "num_train_epochs": 1,
211
+ "save_steps": 50,
212
+ "total_flos": 5.261429458422989e+16,
213
+ "trial_name": null,
214
+ "trial_params": null
215
+ }
checkpoint-700/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7333e78b084eeaece18c6fb119affbad52fe2adda65cecdd7c2cc539cbe1447e
3
+ size 3963
checkpoint-750/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: False
29
+ - bnb_4bit_compute_dtype: float16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.5.0
33
+
34
+ - PEFT 0.5.0
checkpoint-750/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
checkpoint-750/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e199067a284c48546f4130065185eb74a0775795cb7363bccdccb8942b8e7777
3
+ size 134263757
checkpoint-750/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5733972711e183a97c4ebe8c739c6e2359a787c6a878183a1ab50a977f929fbb
3
+ size 268543173
checkpoint-750/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab28b987252c9c9e6fa7594d0095efffc5fbbda4962f64adcba5e996194aaea
3
+ size 14575
checkpoint-750/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f63d521ef9ff6ddc2af27fac07de781fe71b1d15cb22b4b1668b2614d7508484
3
+ size 627
checkpoint-750/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "content": "<s>",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ },
15
+ "eos_token": {
16
+ "content": "</s>",
17
+ "lstrip": false,
18
+ "normalized": true,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
+ "pad_token": "</s>",
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-750/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-750/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
checkpoint-750/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": {
9
+ "__type": "AddedToken",
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "clean_up_tokenization_spaces": false,
17
+ "eos_token": {
18
+ "__type": "AddedToken",
19
+ "content": "</s>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eot_token": "▁<EOT>",
26
+ "fill_token": "<FILL_ME>",
27
+ "legacy": null,
28
+ "middle_token": "▁<MID>",
29
+ "model_max_length": 1000000000000000019884624838656,
30
+ "pad_token": null,
31
+ "prefix_token": "▁<PRE>",
32
+ "sp_model_kwargs": {},
33
+ "suffix_token": "▁<SUF>",
34
+ "tokenizer_class": "CodeLlamaTokenizer",
35
+ "unk_token": {
36
+ "__type": "AddedToken",
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
checkpoint-750/trainer_state.json ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.2272382972276928,
5
+ "eval_steps": 50,
6
+ "global_step": 750,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "learning_rate": 0.00019,
14
+ "loss": 1.4805,
15
+ "step": 50
16
+ },
17
+ {
18
+ "epoch": 0.02,
19
+ "eval_loss": 0.935647189617157,
20
+ "eval_runtime": 1424.1427,
21
+ "eval_samples_per_second": 9.27,
22
+ "eval_steps_per_second": 1.159,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.03,
27
+ "learning_rate": 0.00018,
28
+ "loss": 0.8948,
29
+ "step": 100
30
+ },
31
+ {
32
+ "epoch": 0.03,
33
+ "eval_loss": 0.8673275709152222,
34
+ "eval_runtime": 1447.9268,
35
+ "eval_samples_per_second": 9.118,
36
+ "eval_steps_per_second": 1.14,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.05,
41
+ "learning_rate": 0.00017,
42
+ "loss": 0.8357,
43
+ "step": 150
44
+ },
45
+ {
46
+ "epoch": 0.05,
47
+ "eval_loss": 0.8413857221603394,
48
+ "eval_runtime": 1449.9965,
49
+ "eval_samples_per_second": 9.105,
50
+ "eval_steps_per_second": 1.139,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.06,
55
+ "learning_rate": 0.00016,
56
+ "loss": 0.8461,
57
+ "step": 200
58
+ },
59
+ {
60
+ "epoch": 0.06,
61
+ "eval_loss": 0.8236712217330933,
62
+ "eval_runtime": 1428.6867,
63
+ "eval_samples_per_second": 9.241,
64
+ "eval_steps_per_second": 1.156,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.08,
69
+ "learning_rate": 0.00015000000000000001,
70
+ "loss": 0.8046,
71
+ "step": 250
72
+ },
73
+ {
74
+ "epoch": 0.08,
75
+ "eval_loss": 0.7915458679199219,
76
+ "eval_runtime": 1422.1414,
77
+ "eval_samples_per_second": 9.283,
78
+ "eval_steps_per_second": 1.161,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.09,
83
+ "learning_rate": 0.00014,
84
+ "loss": 0.744,
85
+ "step": 300
86
+ },
87
+ {
88
+ "epoch": 0.09,
89
+ "eval_loss": 0.7999407649040222,
90
+ "eval_runtime": 1408.4646,
91
+ "eval_samples_per_second": 9.373,
92
+ "eval_steps_per_second": 1.172,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.11,
97
+ "learning_rate": 0.00013000000000000002,
98
+ "loss": 0.7219,
99
+ "step": 350
100
+ },
101
+ {
102
+ "epoch": 0.11,
103
+ "eval_loss": 0.7075337171554565,
104
+ "eval_runtime": 1407.3666,
105
+ "eval_samples_per_second": 9.381,
106
+ "eval_steps_per_second": 1.173,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.12,
111
+ "learning_rate": 0.00012,
112
+ "loss": 0.7027,
113
+ "step": 400
114
+ },
115
+ {
116
+ "epoch": 0.12,
117
+ "eval_loss": 0.697420060634613,
118
+ "eval_runtime": 1391.6821,
119
+ "eval_samples_per_second": 9.486,
120
+ "eval_steps_per_second": 1.186,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.14,
125
+ "learning_rate": 0.00011000000000000002,
126
+ "loss": 0.6982,
127
+ "step": 450
128
+ },
129
+ {
130
+ "epoch": 0.14,
131
+ "eval_loss": 0.6917020678520203,
132
+ "eval_runtime": 1383.9747,
133
+ "eval_samples_per_second": 9.539,
134
+ "eval_steps_per_second": 1.193,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.15,
139
+ "learning_rate": 0.0001,
140
+ "loss": 0.6746,
141
+ "step": 500
142
+ },
143
+ {
144
+ "epoch": 0.15,
145
+ "eval_loss": 0.6870374083518982,
146
+ "eval_runtime": 1381.4308,
147
+ "eval_samples_per_second": 9.557,
148
+ "eval_steps_per_second": 1.195,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.17,
153
+ "learning_rate": 9e-05,
154
+ "loss": 0.6667,
155
+ "step": 550
156
+ },
157
+ {
158
+ "epoch": 0.17,
159
+ "eval_loss": 0.6836341619491577,
160
+ "eval_runtime": 1384.702,
161
+ "eval_samples_per_second": 9.534,
162
+ "eval_steps_per_second": 1.192,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.18,
167
+ "learning_rate": 8e-05,
168
+ "loss": 0.6872,
169
+ "step": 600
170
+ },
171
+ {
172
+ "epoch": 0.18,
173
+ "eval_loss": 0.6808720231056213,
174
+ "eval_runtime": 1386.676,
175
+ "eval_samples_per_second": 9.521,
176
+ "eval_steps_per_second": 1.191,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.2,
181
+ "learning_rate": 7e-05,
182
+ "loss": 0.6793,
183
+ "step": 650
184
+ },
185
+ {
186
+ "epoch": 0.2,
187
+ "eval_loss": 0.6773844957351685,
188
+ "eval_runtime": 1607.2067,
189
+ "eval_samples_per_second": 8.214,
190
+ "eval_steps_per_second": 1.027,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.21,
195
+ "learning_rate": 6e-05,
196
+ "loss": 0.6989,
197
+ "step": 700
198
+ },
199
+ {
200
+ "epoch": 0.21,
201
+ "eval_loss": 0.6752211451530457,
202
+ "eval_runtime": 1618.5926,
203
+ "eval_samples_per_second": 8.156,
204
+ "eval_steps_per_second": 1.02,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.23,
209
+ "learning_rate": 5e-05,
210
+ "loss": 0.6525,
211
+ "step": 750
212
+ },
213
+ {
214
+ "epoch": 0.23,
215
+ "eval_loss": 0.6731519103050232,
216
+ "eval_runtime": 1621.8237,
217
+ "eval_samples_per_second": 8.14,
218
+ "eval_steps_per_second": 1.018,
219
+ "step": 750
220
+ }
221
+ ],
222
+ "logging_steps": 50,
223
+ "max_steps": 1000,
224
+ "num_train_epochs": 1,
225
+ "save_steps": 50,
226
+ "total_flos": 5.64019534036009e+16,
227
+ "trial_name": null,
228
+ "trial_params": null
229
+ }
checkpoint-750/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7333e78b084eeaece18c6fb119affbad52fe2adda65cecdd7c2cc539cbe1447e
3
+ size 3963
checkpoint-800/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: False
29
+ - bnb_4bit_compute_dtype: float16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.5.0
33
+
34
+ - PEFT 0.5.0
checkpoint-800/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }