QingyiSi commited on
Commit
4697198
1 Parent(s): 03280e3

Upload 1268 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. adapters/saved-alpaca-13b/adapter_config.json +18 -0
  2. adapters/saved-alpaca-13b/adapter_model.bin +3 -0
  3. adapters/saved-alpaca-13b/checkpoint-1000/optimizer.pt +3 -0
  4. adapters/saved-alpaca-13b/checkpoint-1000/pytorch_model.bin +3 -0
  5. adapters/saved-alpaca-13b/checkpoint-1000/rng_state_0.pth +3 -0
  6. adapters/saved-alpaca-13b/checkpoint-1000/rng_state_1.pth +3 -0
  7. adapters/saved-alpaca-13b/checkpoint-1000/rng_state_2.pth +3 -0
  8. adapters/saved-alpaca-13b/checkpoint-1000/rng_state_3.pth +3 -0
  9. adapters/saved-alpaca-13b/checkpoint-1000/rng_state_4.pth +3 -0
  10. adapters/saved-alpaca-13b/checkpoint-1000/rng_state_5.pth +3 -0
  11. adapters/saved-alpaca-13b/checkpoint-1000/rng_state_6.pth +3 -0
  12. adapters/saved-alpaca-13b/checkpoint-1000/rng_state_7.pth +3 -0
  13. adapters/saved-alpaca-13b/checkpoint-1000/scaler.pt +3 -0
  14. adapters/saved-alpaca-13b/checkpoint-1000/scheduler.pt +3 -0
  15. adapters/saved-alpaca-13b/checkpoint-1000/trainer_state.json +356 -0
  16. adapters/saved-alpaca-13b/checkpoint-1000/training_args.bin +3 -0
  17. adapters/saved-alpaca-13b/checkpoint-600/optimizer.pt +3 -0
  18. adapters/saved-alpaca-13b/checkpoint-600/pytorch_model.bin +3 -0
  19. adapters/saved-alpaca-13b/checkpoint-600/rng_state_0.pth +3 -0
  20. adapters/saved-alpaca-13b/checkpoint-600/rng_state_1.pth +3 -0
  21. adapters/saved-alpaca-13b/checkpoint-600/rng_state_2.pth +3 -0
  22. adapters/saved-alpaca-13b/checkpoint-600/rng_state_3.pth +3 -0
  23. adapters/saved-alpaca-13b/checkpoint-600/rng_state_4.pth +3 -0
  24. adapters/saved-alpaca-13b/checkpoint-600/rng_state_5.pth +3 -0
  25. adapters/saved-alpaca-13b/checkpoint-600/rng_state_6.pth +3 -0
  26. adapters/saved-alpaca-13b/checkpoint-600/rng_state_7.pth +3 -0
  27. adapters/saved-alpaca-13b/checkpoint-600/scaler.pt +3 -0
  28. adapters/saved-alpaca-13b/checkpoint-600/scheduler.pt +3 -0
  29. adapters/saved-alpaca-13b/checkpoint-600/trainer_state.json +220 -0
  30. adapters/saved-alpaca-13b/checkpoint-600/training_args.bin +3 -0
  31. adapters/saved-alpaca-13b/checkpoint-800/optimizer.pt +3 -0
  32. adapters/saved-alpaca-13b/checkpoint-800/pytorch_model.bin +3 -0
  33. adapters/saved-alpaca-13b/checkpoint-800/rng_state_0.pth +3 -0
  34. adapters/saved-alpaca-13b/checkpoint-800/rng_state_1.pth +3 -0
  35. adapters/saved-alpaca-13b/checkpoint-800/rng_state_2.pth +3 -0
  36. adapters/saved-alpaca-13b/checkpoint-800/rng_state_3.pth +3 -0
  37. adapters/saved-alpaca-13b/checkpoint-800/rng_state_4.pth +3 -0
  38. adapters/saved-alpaca-13b/checkpoint-800/rng_state_5.pth +3 -0
  39. adapters/saved-alpaca-13b/checkpoint-800/rng_state_6.pth +3 -0
  40. adapters/saved-alpaca-13b/checkpoint-800/rng_state_7.pth +3 -0
  41. adapters/saved-alpaca-13b/checkpoint-800/scaler.pt +3 -0
  42. adapters/saved-alpaca-13b/checkpoint-800/scheduler.pt +3 -0
  43. adapters/saved-alpaca-13b/checkpoint-800/trainer_state.json +288 -0
  44. adapters/saved-alpaca-13b/checkpoint-800/training_args.bin +3 -0
  45. adapters/saved-alpaca-30b/adapter_config.json +18 -0
  46. adapters/saved-alpaca-30b/adapter_model.bin +3 -0
  47. adapters/saved-alpaca-30b/checkpoint-1000/optimizer.pt +3 -0
  48. adapters/saved-alpaca-30b/checkpoint-1000/pytorch_model.bin +3 -0
  49. adapters/saved-alpaca-30b/checkpoint-1000/rng_state_0.pth +3 -0
  50. adapters/saved-alpaca-30b/checkpoint-1000/rng_state_1.pth +3 -0
adapters/saved-alpaca-13b/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "/mnt/bn/qingyi-bn-lq/llama/llama-13b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "lora_alpha": 16,
8
+ "lora_dropout": 0.05,
9
+ "merge_weights": false,
10
+ "modules_to_save": null,
11
+ "peft_type": "LORA",
12
+ "r": 8,
13
+ "target_modules": [
14
+ "q_proj",
15
+ "v_proj"
16
+ ],
17
+ "task_type": "CAUSAL_LM"
18
+ }
adapters/saved-alpaca-13b/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f520b2585949c23c374ad167d169b4fc21b3cc8411305b18ae1b7bd1d49002
3
+ size 26271757
adapters/saved-alpaca-13b/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e7c503114ad958265cb0b9937aa6f04e6b3e5ebed55f402be0dfe2608728e1b
3
+ size 52523141
adapters/saved-alpaca-13b/checkpoint-1000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a485291ae756d9ce2a9f1e587dca5a7f2e956eb0d2ac36a0538fd9ee8fe8568
3
+ size 26271757
adapters/saved-alpaca-13b/checkpoint-1000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2d4f40efc056e291f0e7bc62df73b1723ac76b464e4dfa2950f67d2923899f
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-1000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb44bc4712cb02da41900ef02cb5e2115d133414246f5122c59a215c7fe47e9
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-1000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:502133cf23fc1f7cde440bff04cdc89a4fd570c0158d3b2ae9f6012edcf11ba3
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-1000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfea5e1888cf75341567d2f38c227d33ec186ecd19e89de39809a43d9f05a0e1
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-1000/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d53cb797ccdf76e13be23cc92a67cb626bc8920b6455ae36c4554d606c59f38
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-1000/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6c980ec98f6a650a1f1406bf0637c97c375a37ccb6617e7c9214eb5329e587b
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-1000/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19beeabb3d073c196d42ab61f0ea7ddee10e55c26f725f3343c43d2eb06e7226
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-1000/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e8deea3980223c29fe97f5415e158a42ed7c6e93ed515673cc65d3adaf369bb
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-1000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68cff80b680ddf6e7abbef98b5f336b97f9b5963e2209307f639383870e8cc71
3
+ size 557
adapters/saved-alpaca-13b/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:988fa96f15b0bcf68e96b1a4d321f89c5a5aca28eeae640fe236375758ba5304
3
+ size 627
adapters/saved-alpaca-13b/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8150926828384399,
3
+ "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved-alpaca-13b/checkpoint-1000",
4
+ "epoch": 2.5624599615631007,
5
+ "global_step": 1000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 5.9999999999999995e-05,
13
+ "loss": 1.9024,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "learning_rate": 0.00011999999999999999,
19
+ "loss": 1.4401,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.15,
24
+ "learning_rate": 0.00017999999999999998,
25
+ "loss": 0.9439,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.2,
30
+ "learning_rate": 0.00023999999999999998,
31
+ "loss": 0.8693,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.26,
36
+ "learning_rate": 0.0003,
37
+ "loss": 0.8598,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.31,
42
+ "learning_rate": 0.00029439252336448596,
43
+ "loss": 0.8485,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.36,
48
+ "learning_rate": 0.00028878504672897194,
49
+ "loss": 0.8323,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 0.41,
54
+ "learning_rate": 0.0002831775700934579,
55
+ "loss": 0.8364,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 0.46,
60
+ "learning_rate": 0.0002775700934579439,
61
+ "loss": 0.8364,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 0.51,
66
+ "learning_rate": 0.0002719626168224299,
67
+ "loss": 0.842,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 0.51,
72
+ "eval_loss": 0.8336600661277771,
73
+ "eval_runtime": 14.4551,
74
+ "eval_samples_per_second": 138.359,
75
+ "eval_steps_per_second": 2.214,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 0.56,
80
+ "learning_rate": 0.00026635514018691586,
81
+ "loss": 0.8289,
82
+ "step": 220
83
+ },
84
+ {
85
+ "epoch": 0.61,
86
+ "learning_rate": 0.00026074766355140184,
87
+ "loss": 0.8383,
88
+ "step": 240
89
+ },
90
+ {
91
+ "epoch": 0.67,
92
+ "learning_rate": 0.0002551401869158878,
93
+ "loss": 0.822,
94
+ "step": 260
95
+ },
96
+ {
97
+ "epoch": 0.72,
98
+ "learning_rate": 0.0002495327102803738,
99
+ "loss": 0.8378,
100
+ "step": 280
101
+ },
102
+ {
103
+ "epoch": 0.77,
104
+ "learning_rate": 0.0002439252336448598,
105
+ "loss": 0.8275,
106
+ "step": 300
107
+ },
108
+ {
109
+ "epoch": 0.82,
110
+ "learning_rate": 0.00023831775700934577,
111
+ "loss": 0.8225,
112
+ "step": 320
113
+ },
114
+ {
115
+ "epoch": 0.87,
116
+ "learning_rate": 0.00023271028037383175,
117
+ "loss": 0.8188,
118
+ "step": 340
119
+ },
120
+ {
121
+ "epoch": 0.92,
122
+ "learning_rate": 0.00022710280373831773,
123
+ "loss": 0.8251,
124
+ "step": 360
125
+ },
126
+ {
127
+ "epoch": 0.97,
128
+ "learning_rate": 0.0002214953271028037,
129
+ "loss": 0.8107,
130
+ "step": 380
131
+ },
132
+ {
133
+ "epoch": 1.02,
134
+ "learning_rate": 0.0002158878504672897,
135
+ "loss": 0.806,
136
+ "step": 400
137
+ },
138
+ {
139
+ "epoch": 1.02,
140
+ "eval_loss": 0.8227179050445557,
141
+ "eval_runtime": 14.4306,
142
+ "eval_samples_per_second": 138.594,
143
+ "eval_steps_per_second": 2.218,
144
+ "step": 400
145
+ },
146
+ {
147
+ "epoch": 1.08,
148
+ "learning_rate": 0.00021028037383177567,
149
+ "loss": 0.8157,
150
+ "step": 420
151
+ },
152
+ {
153
+ "epoch": 1.13,
154
+ "learning_rate": 0.00020467289719626166,
155
+ "loss": 0.8139,
156
+ "step": 440
157
+ },
158
+ {
159
+ "epoch": 1.18,
160
+ "learning_rate": 0.00019906542056074764,
161
+ "loss": 0.8203,
162
+ "step": 460
163
+ },
164
+ {
165
+ "epoch": 1.23,
166
+ "learning_rate": 0.00019345794392523362,
167
+ "loss": 0.8183,
168
+ "step": 480
169
+ },
170
+ {
171
+ "epoch": 1.28,
172
+ "learning_rate": 0.0001878504672897196,
173
+ "loss": 0.8046,
174
+ "step": 500
175
+ },
176
+ {
177
+ "epoch": 1.33,
178
+ "learning_rate": 0.00018224299065420558,
179
+ "loss": 0.8053,
180
+ "step": 520
181
+ },
182
+ {
183
+ "epoch": 1.38,
184
+ "learning_rate": 0.00017663551401869156,
185
+ "loss": 0.8037,
186
+ "step": 540
187
+ },
188
+ {
189
+ "epoch": 1.43,
190
+ "learning_rate": 0.00017102803738317754,
191
+ "loss": 0.8036,
192
+ "step": 560
193
+ },
194
+ {
195
+ "epoch": 1.49,
196
+ "learning_rate": 0.00016542056074766352,
197
+ "loss": 0.7971,
198
+ "step": 580
199
+ },
200
+ {
201
+ "epoch": 1.54,
202
+ "learning_rate": 0.0001598130841121495,
203
+ "loss": 0.8024,
204
+ "step": 600
205
+ },
206
+ {
207
+ "epoch": 1.54,
208
+ "eval_loss": 0.8188450932502747,
209
+ "eval_runtime": 14.5635,
210
+ "eval_samples_per_second": 137.329,
211
+ "eval_steps_per_second": 2.197,
212
+ "step": 600
213
+ },
214
+ {
215
+ "epoch": 1.59,
216
+ "learning_rate": 0.0001542056074766355,
217
+ "loss": 0.8042,
218
+ "step": 620
219
+ },
220
+ {
221
+ "epoch": 1.64,
222
+ "learning_rate": 0.00014859813084112147,
223
+ "loss": 0.8148,
224
+ "step": 640
225
+ },
226
+ {
227
+ "epoch": 1.69,
228
+ "learning_rate": 0.00014299065420560745,
229
+ "loss": 0.7976,
230
+ "step": 660
231
+ },
232
+ {
233
+ "epoch": 1.74,
234
+ "learning_rate": 0.00013738317757009343,
235
+ "loss": 0.8112,
236
+ "step": 680
237
+ },
238
+ {
239
+ "epoch": 1.79,
240
+ "learning_rate": 0.0001317757009345794,
241
+ "loss": 0.805,
242
+ "step": 700
243
+ },
244
+ {
245
+ "epoch": 1.84,
246
+ "learning_rate": 0.0001261682242990654,
247
+ "loss": 0.797,
248
+ "step": 720
249
+ },
250
+ {
251
+ "epoch": 1.9,
252
+ "learning_rate": 0.00012056074766355139,
253
+ "loss": 0.7883,
254
+ "step": 740
255
+ },
256
+ {
257
+ "epoch": 1.95,
258
+ "learning_rate": 0.00011495327102803737,
259
+ "loss": 0.8026,
260
+ "step": 760
261
+ },
262
+ {
263
+ "epoch": 2.0,
264
+ "learning_rate": 0.00010934579439252335,
265
+ "loss": 0.8098,
266
+ "step": 780
267
+ },
268
+ {
269
+ "epoch": 2.05,
270
+ "learning_rate": 0.00010373831775700933,
271
+ "loss": 0.8021,
272
+ "step": 800
273
+ },
274
+ {
275
+ "epoch": 2.05,
276
+ "eval_loss": 0.8164276480674744,
277
+ "eval_runtime": 14.5011,
278
+ "eval_samples_per_second": 137.921,
279
+ "eval_steps_per_second": 2.207,
280
+ "step": 800
281
+ },
282
+ {
283
+ "epoch": 2.1,
284
+ "learning_rate": 9.813084112149531e-05,
285
+ "loss": 0.7967,
286
+ "step": 820
287
+ },
288
+ {
289
+ "epoch": 2.15,
290
+ "learning_rate": 9.25233644859813e-05,
291
+ "loss": 0.793,
292
+ "step": 840
293
+ },
294
+ {
295
+ "epoch": 2.2,
296
+ "learning_rate": 8.691588785046728e-05,
297
+ "loss": 0.8026,
298
+ "step": 860
299
+ },
300
+ {
301
+ "epoch": 2.25,
302
+ "learning_rate": 8.130841121495326e-05,
303
+ "loss": 0.7911,
304
+ "step": 880
305
+ },
306
+ {
307
+ "epoch": 2.31,
308
+ "learning_rate": 7.570093457943924e-05,
309
+ "loss": 0.8042,
310
+ "step": 900
311
+ },
312
+ {
313
+ "epoch": 2.36,
314
+ "learning_rate": 7.009345794392522e-05,
315
+ "loss": 0.7994,
316
+ "step": 920
317
+ },
318
+ {
319
+ "epoch": 2.41,
320
+ "learning_rate": 6.44859813084112e-05,
321
+ "loss": 0.8056,
322
+ "step": 940
323
+ },
324
+ {
325
+ "epoch": 2.46,
326
+ "learning_rate": 5.887850467289719e-05,
327
+ "loss": 0.7943,
328
+ "step": 960
329
+ },
330
+ {
331
+ "epoch": 2.51,
332
+ "learning_rate": 5.327102803738317e-05,
333
+ "loss": 0.7987,
334
+ "step": 980
335
+ },
336
+ {
337
+ "epoch": 2.56,
338
+ "learning_rate": 4.766355140186915e-05,
339
+ "loss": 0.8006,
340
+ "step": 1000
341
+ },
342
+ {
343
+ "epoch": 2.56,
344
+ "eval_loss": 0.8150926828384399,
345
+ "eval_runtime": 14.519,
346
+ "eval_samples_per_second": 137.751,
347
+ "eval_steps_per_second": 2.204,
348
+ "step": 1000
349
+ }
350
+ ],
351
+ "max_steps": 1170,
352
+ "num_train_epochs": 3,
353
+ "total_flos": 2.527783339700519e+18,
354
+ "trial_name": null,
355
+ "trial_params": null
356
+ }
adapters/saved-alpaca-13b/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:569cffde1178ce7cb3b47607b0b6b1b562a8816ca51d5d79948ec2815c618c99
3
+ size 3579
adapters/saved-alpaca-13b/checkpoint-600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e445eed20c1696cd10624c265dfe0008908b629a4c3b68d091dbdd6d6d15bb
3
+ size 52523141
adapters/saved-alpaca-13b/checkpoint-600/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3388b0e66a46f5f58661b8842f81cc59c7403c62035ba064e3a82985570fc045
3
+ size 26271757
adapters/saved-alpaca-13b/checkpoint-600/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a401151419ac788509518fecd7290393f6661cbb814b9d23e0507968245b6dd
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-600/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d31064b0c52dea5c8b99aaca805fad14c4188d2f37b7f3f99c5b4329ef0fe1
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-600/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82500c71ca1a097dc82fa54a6a17ac633526f2ce40d060dde7da01abca5a1530
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-600/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c317a21316a208880af8f21030493d2ffa5f332154ffae8970e90d6d3b9baa7f
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-600/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df6fd724264e10e653e99e7343342410999eca369fecbf183329b923a782797e
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-600/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a6b3bfc5317511071ea71fe365af9b93c488be84bd421286a3e319c75ae1ed7
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-600/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09b5773348f2c99824a5b650d0bd7cbae32b7808ed38e8264121e8b4ebc7c33c
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-600/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4a1e6d9da1d2200d29265401dbae1332be7010031641244ec5316bc187a6cd
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-600/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc9eacfeb00bd0bfeb98934a2309be01be65b288e0d747bbfc423b32679169f
3
+ size 557
adapters/saved-alpaca-13b/checkpoint-600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab0b5eaefe5317a6f29d9c670ecd5644d66afb60156c841d18e022a62f983d66
3
+ size 627
adapters/saved-alpaca-13b/checkpoint-600/trainer_state.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8188450932502747,
3
+ "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved-alpaca-13b/checkpoint-600",
4
+ "epoch": 1.5374759769378603,
5
+ "global_step": 600,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 5.9999999999999995e-05,
13
+ "loss": 1.9024,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "learning_rate": 0.00011999999999999999,
19
+ "loss": 1.4401,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.15,
24
+ "learning_rate": 0.00017999999999999998,
25
+ "loss": 0.9439,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.2,
30
+ "learning_rate": 0.00023999999999999998,
31
+ "loss": 0.8693,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.26,
36
+ "learning_rate": 0.0003,
37
+ "loss": 0.8598,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.31,
42
+ "learning_rate": 0.00029439252336448596,
43
+ "loss": 0.8485,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.36,
48
+ "learning_rate": 0.00028878504672897194,
49
+ "loss": 0.8323,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 0.41,
54
+ "learning_rate": 0.0002831775700934579,
55
+ "loss": 0.8364,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 0.46,
60
+ "learning_rate": 0.0002775700934579439,
61
+ "loss": 0.8364,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 0.51,
66
+ "learning_rate": 0.0002719626168224299,
67
+ "loss": 0.842,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 0.51,
72
+ "eval_loss": 0.8336600661277771,
73
+ "eval_runtime": 14.4551,
74
+ "eval_samples_per_second": 138.359,
75
+ "eval_steps_per_second": 2.214,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 0.56,
80
+ "learning_rate": 0.00026635514018691586,
81
+ "loss": 0.8289,
82
+ "step": 220
83
+ },
84
+ {
85
+ "epoch": 0.61,
86
+ "learning_rate": 0.00026074766355140184,
87
+ "loss": 0.8383,
88
+ "step": 240
89
+ },
90
+ {
91
+ "epoch": 0.67,
92
+ "learning_rate": 0.0002551401869158878,
93
+ "loss": 0.822,
94
+ "step": 260
95
+ },
96
+ {
97
+ "epoch": 0.72,
98
+ "learning_rate": 0.0002495327102803738,
99
+ "loss": 0.8378,
100
+ "step": 280
101
+ },
102
+ {
103
+ "epoch": 0.77,
104
+ "learning_rate": 0.0002439252336448598,
105
+ "loss": 0.8275,
106
+ "step": 300
107
+ },
108
+ {
109
+ "epoch": 0.82,
110
+ "learning_rate": 0.00023831775700934577,
111
+ "loss": 0.8225,
112
+ "step": 320
113
+ },
114
+ {
115
+ "epoch": 0.87,
116
+ "learning_rate": 0.00023271028037383175,
117
+ "loss": 0.8188,
118
+ "step": 340
119
+ },
120
+ {
121
+ "epoch": 0.92,
122
+ "learning_rate": 0.00022710280373831773,
123
+ "loss": 0.8251,
124
+ "step": 360
125
+ },
126
+ {
127
+ "epoch": 0.97,
128
+ "learning_rate": 0.0002214953271028037,
129
+ "loss": 0.8107,
130
+ "step": 380
131
+ },
132
+ {
133
+ "epoch": 1.02,
134
+ "learning_rate": 0.0002158878504672897,
135
+ "loss": 0.806,
136
+ "step": 400
137
+ },
138
+ {
139
+ "epoch": 1.02,
140
+ "eval_loss": 0.8227179050445557,
141
+ "eval_runtime": 14.4306,
142
+ "eval_samples_per_second": 138.594,
143
+ "eval_steps_per_second": 2.218,
144
+ "step": 400
145
+ },
146
+ {
147
+ "epoch": 1.08,
148
+ "learning_rate": 0.00021028037383177567,
149
+ "loss": 0.8157,
150
+ "step": 420
151
+ },
152
+ {
153
+ "epoch": 1.13,
154
+ "learning_rate": 0.00020467289719626166,
155
+ "loss": 0.8139,
156
+ "step": 440
157
+ },
158
+ {
159
+ "epoch": 1.18,
160
+ "learning_rate": 0.00019906542056074764,
161
+ "loss": 0.8203,
162
+ "step": 460
163
+ },
164
+ {
165
+ "epoch": 1.23,
166
+ "learning_rate": 0.00019345794392523362,
167
+ "loss": 0.8183,
168
+ "step": 480
169
+ },
170
+ {
171
+ "epoch": 1.28,
172
+ "learning_rate": 0.0001878504672897196,
173
+ "loss": 0.8046,
174
+ "step": 500
175
+ },
176
+ {
177
+ "epoch": 1.33,
178
+ "learning_rate": 0.00018224299065420558,
179
+ "loss": 0.8053,
180
+ "step": 520
181
+ },
182
+ {
183
+ "epoch": 1.38,
184
+ "learning_rate": 0.00017663551401869156,
185
+ "loss": 0.8037,
186
+ "step": 540
187
+ },
188
+ {
189
+ "epoch": 1.43,
190
+ "learning_rate": 0.00017102803738317754,
191
+ "loss": 0.8036,
192
+ "step": 560
193
+ },
194
+ {
195
+ "epoch": 1.49,
196
+ "learning_rate": 0.00016542056074766352,
197
+ "loss": 0.7971,
198
+ "step": 580
199
+ },
200
+ {
201
+ "epoch": 1.54,
202
+ "learning_rate": 0.0001598130841121495,
203
+ "loss": 0.8024,
204
+ "step": 600
205
+ },
206
+ {
207
+ "epoch": 1.54,
208
+ "eval_loss": 0.8188450932502747,
209
+ "eval_runtime": 14.5635,
210
+ "eval_samples_per_second": 137.329,
211
+ "eval_steps_per_second": 2.197,
212
+ "step": 600
213
+ }
214
+ ],
215
+ "max_steps": 1170,
216
+ "num_train_epochs": 3,
217
+ "total_flos": 1.5167016053306819e+18,
218
+ "trial_name": null,
219
+ "trial_params": null
220
+ }
adapters/saved-alpaca-13b/checkpoint-600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:569cffde1178ce7cb3b47607b0b6b1b562a8816ca51d5d79948ec2815c618c99
3
+ size 3579
adapters/saved-alpaca-13b/checkpoint-800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a0106565c0a3f46bd8586f3992cc17ea683b95bd5b565fe1ce96fd1667082e3
3
+ size 52523141
adapters/saved-alpaca-13b/checkpoint-800/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6d5cf675ef9b1bb62432ca7fe7defdd413bc528b7b88c703059361c0592a5c1
3
+ size 26271757
adapters/saved-alpaca-13b/checkpoint-800/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8abd48d76a5f0f966df413c2c82c2c9e03343968546fb321f0e4dad0af2c5688
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-800/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f05d613698fda587a8e1836e4405f3a8a8b51e0d2f1657924ccf69f123b43daa
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-800/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:001a14f29826dd590373d0e9c1051359997d64ec822fb50b1cabbd25868ef872
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-800/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25ca8f826bd679c698d40e7c7b6421876a004d7ee0c7c1f4627142b8470543a3
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-800/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec3482573f68be8a59c79516d457f452e08e29a2dcb57de4b901670d361d37f
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-800/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:059585222586276099098bf816da3a60abf4e41c1131c0bb6b9dc17ce74ff48d
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-800/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726e2dd1d9070a86366cb043486560fe6cd90fad3fb89344beffec018587ddce
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-800/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af8d9d6dc0510fc91cbb94adfe6c2322f7fc807b649b3bb74ea2a584ca9b9cdb
3
+ size 14583
adapters/saved-alpaca-13b/checkpoint-800/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27ec07a12731ae6f9765d05fe7c8495505f1d0f90b4cc6255a0853fec3970808
3
+ size 557
adapters/saved-alpaca-13b/checkpoint-800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98c4b9b4bff768da63a57f06e31324664e9c442c12b99f05f7bf2cd746d192e9
3
+ size 627
adapters/saved-alpaca-13b/checkpoint-800/trainer_state.json ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8164276480674744,
3
+ "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved-alpaca-13b/checkpoint-800",
4
+ "epoch": 2.0499679692504804,
5
+ "global_step": 800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 5.9999999999999995e-05,
13
+ "loss": 1.9024,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "learning_rate": 0.00011999999999999999,
19
+ "loss": 1.4401,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.15,
24
+ "learning_rate": 0.00017999999999999998,
25
+ "loss": 0.9439,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.2,
30
+ "learning_rate": 0.00023999999999999998,
31
+ "loss": 0.8693,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.26,
36
+ "learning_rate": 0.0003,
37
+ "loss": 0.8598,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.31,
42
+ "learning_rate": 0.00029439252336448596,
43
+ "loss": 0.8485,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.36,
48
+ "learning_rate": 0.00028878504672897194,
49
+ "loss": 0.8323,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 0.41,
54
+ "learning_rate": 0.0002831775700934579,
55
+ "loss": 0.8364,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 0.46,
60
+ "learning_rate": 0.0002775700934579439,
61
+ "loss": 0.8364,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 0.51,
66
+ "learning_rate": 0.0002719626168224299,
67
+ "loss": 0.842,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 0.51,
72
+ "eval_loss": 0.8336600661277771,
73
+ "eval_runtime": 14.4551,
74
+ "eval_samples_per_second": 138.359,
75
+ "eval_steps_per_second": 2.214,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 0.56,
80
+ "learning_rate": 0.00026635514018691586,
81
+ "loss": 0.8289,
82
+ "step": 220
83
+ },
84
+ {
85
+ "epoch": 0.61,
86
+ "learning_rate": 0.00026074766355140184,
87
+ "loss": 0.8383,
88
+ "step": 240
89
+ },
90
+ {
91
+ "epoch": 0.67,
92
+ "learning_rate": 0.0002551401869158878,
93
+ "loss": 0.822,
94
+ "step": 260
95
+ },
96
+ {
97
+ "epoch": 0.72,
98
+ "learning_rate": 0.0002495327102803738,
99
+ "loss": 0.8378,
100
+ "step": 280
101
+ },
102
+ {
103
+ "epoch": 0.77,
104
+ "learning_rate": 0.0002439252336448598,
105
+ "loss": 0.8275,
106
+ "step": 300
107
+ },
108
+ {
109
+ "epoch": 0.82,
110
+ "learning_rate": 0.00023831775700934577,
111
+ "loss": 0.8225,
112
+ "step": 320
113
+ },
114
+ {
115
+ "epoch": 0.87,
116
+ "learning_rate": 0.00023271028037383175,
117
+ "loss": 0.8188,
118
+ "step": 340
119
+ },
120
+ {
121
+ "epoch": 0.92,
122
+ "learning_rate": 0.00022710280373831773,
123
+ "loss": 0.8251,
124
+ "step": 360
125
+ },
126
+ {
127
+ "epoch": 0.97,
128
+ "learning_rate": 0.0002214953271028037,
129
+ "loss": 0.8107,
130
+ "step": 380
131
+ },
132
+ {
133
+ "epoch": 1.02,
134
+ "learning_rate": 0.0002158878504672897,
135
+ "loss": 0.806,
136
+ "step": 400
137
+ },
138
+ {
139
+ "epoch": 1.02,
140
+ "eval_loss": 0.8227179050445557,
141
+ "eval_runtime": 14.4306,
142
+ "eval_samples_per_second": 138.594,
143
+ "eval_steps_per_second": 2.218,
144
+ "step": 400
145
+ },
146
+ {
147
+ "epoch": 1.08,
148
+ "learning_rate": 0.00021028037383177567,
149
+ "loss": 0.8157,
150
+ "step": 420
151
+ },
152
+ {
153
+ "epoch": 1.13,
154
+ "learning_rate": 0.00020467289719626166,
155
+ "loss": 0.8139,
156
+ "step": 440
157
+ },
158
+ {
159
+ "epoch": 1.18,
160
+ "learning_rate": 0.00019906542056074764,
161
+ "loss": 0.8203,
162
+ "step": 460
163
+ },
164
+ {
165
+ "epoch": 1.23,
166
+ "learning_rate": 0.00019345794392523362,
167
+ "loss": 0.8183,
168
+ "step": 480
169
+ },
170
+ {
171
+ "epoch": 1.28,
172
+ "learning_rate": 0.0001878504672897196,
173
+ "loss": 0.8046,
174
+ "step": 500
175
+ },
176
+ {
177
+ "epoch": 1.33,
178
+ "learning_rate": 0.00018224299065420558,
179
+ "loss": 0.8053,
180
+ "step": 520
181
+ },
182
+ {
183
+ "epoch": 1.38,
184
+ "learning_rate": 0.00017663551401869156,
185
+ "loss": 0.8037,
186
+ "step": 540
187
+ },
188
+ {
189
+ "epoch": 1.43,
190
+ "learning_rate": 0.00017102803738317754,
191
+ "loss": 0.8036,
192
+ "step": 560
193
+ },
194
+ {
195
+ "epoch": 1.49,
196
+ "learning_rate": 0.00016542056074766352,
197
+ "loss": 0.7971,
198
+ "step": 580
199
+ },
200
+ {
201
+ "epoch": 1.54,
202
+ "learning_rate": 0.0001598130841121495,
203
+ "loss": 0.8024,
204
+ "step": 600
205
+ },
206
+ {
207
+ "epoch": 1.54,
208
+ "eval_loss": 0.8188450932502747,
209
+ "eval_runtime": 14.5635,
210
+ "eval_samples_per_second": 137.329,
211
+ "eval_steps_per_second": 2.197,
212
+ "step": 600
213
+ },
214
+ {
215
+ "epoch": 1.59,
216
+ "learning_rate": 0.0001542056074766355,
217
+ "loss": 0.8042,
218
+ "step": 620
219
+ },
220
+ {
221
+ "epoch": 1.64,
222
+ "learning_rate": 0.00014859813084112147,
223
+ "loss": 0.8148,
224
+ "step": 640
225
+ },
226
+ {
227
+ "epoch": 1.69,
228
+ "learning_rate": 0.00014299065420560745,
229
+ "loss": 0.7976,
230
+ "step": 660
231
+ },
232
+ {
233
+ "epoch": 1.74,
234
+ "learning_rate": 0.00013738317757009343,
235
+ "loss": 0.8112,
236
+ "step": 680
237
+ },
238
+ {
239
+ "epoch": 1.79,
240
+ "learning_rate": 0.0001317757009345794,
241
+ "loss": 0.805,
242
+ "step": 700
243
+ },
244
+ {
245
+ "epoch": 1.84,
246
+ "learning_rate": 0.0001261682242990654,
247
+ "loss": 0.797,
248
+ "step": 720
249
+ },
250
+ {
251
+ "epoch": 1.9,
252
+ "learning_rate": 0.00012056074766355139,
253
+ "loss": 0.7883,
254
+ "step": 740
255
+ },
256
+ {
257
+ "epoch": 1.95,
258
+ "learning_rate": 0.00011495327102803737,
259
+ "loss": 0.8026,
260
+ "step": 760
261
+ },
262
+ {
263
+ "epoch": 2.0,
264
+ "learning_rate": 0.00010934579439252335,
265
+ "loss": 0.8098,
266
+ "step": 780
267
+ },
268
+ {
269
+ "epoch": 2.05,
270
+ "learning_rate": 0.00010373831775700933,
271
+ "loss": 0.8021,
272
+ "step": 800
273
+ },
274
+ {
275
+ "epoch": 2.05,
276
+ "eval_loss": 0.8164276480674744,
277
+ "eval_runtime": 14.5011,
278
+ "eval_samples_per_second": 137.921,
279
+ "eval_steps_per_second": 2.207,
280
+ "step": 800
281
+ }
282
+ ],
283
+ "max_steps": 1170,
284
+ "num_train_epochs": 3,
285
+ "total_flos": 2.022163468739674e+18,
286
+ "trial_name": null,
287
+ "trial_params": null
288
+ }
adapters/saved-alpaca-13b/checkpoint-800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:569cffde1178ce7cb3b47607b0b6b1b562a8816ca51d5d79948ec2815c618c99
3
+ size 3579
adapters/saved-alpaca-30b/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "/mnt/bn/qingyi-bn-lq/llama/llama-30b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "lora_alpha": 16,
8
+ "lora_dropout": 0.05,
9
+ "merge_weights": false,
10
+ "modules_to_save": null,
11
+ "peft_type": "LORA",
12
+ "r": 8,
13
+ "target_modules": [
14
+ "q_proj",
15
+ "v_proj"
16
+ ],
17
+ "task_type": "CAUSAL_LM"
18
+ }
adapters/saved-alpaca-30b/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ce3914959790fb662ef96782a3dbf51d0389d5e6dc788ba1b4c3168000ba8e1
3
+ size 51204365
adapters/saved-alpaca-30b/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1003515bf84f62eb769e2ef0bb929c6db3457b3c0810df2976efbeaa82dc1c7a
3
+ size 102377669
adapters/saved-alpaca-30b/checkpoint-1000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06013f5d13fcd882f339d95b43b55ca7ec455466da5a5ec2721ca81617174771
3
+ size 51204365
adapters/saved-alpaca-30b/checkpoint-1000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b324516799aef68bd0b329d375bc92c3d156fb72d07e9939b8d80339153d370
3
+ size 14583
adapters/saved-alpaca-30b/checkpoint-1000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36b8d42f588c9287e71758c87a240aef55cc53f75d82f8ec0bea88228494b72d
3
+ size 14583