daochf commited on
Commit
9c8bf1e
1 Parent(s): 7e340ed

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +567 -1
README.md CHANGED
@@ -7,7 +7,573 @@ base_model: meta-llama/Llama-2-7b-hf
7
 
8
  <!-- Provide a quick summary of what the model is/does. -->
9
 
10
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  ## Model Details
13
 
 
7
 
8
  <!-- Provide a quick summary of what the model is/does. -->
9
 
10
+ ```json
11
+ {
12
+ "best_metric": 0.0680607408285141,
13
+ "best_model_checkpoint": "./Lora-Meta-Llama2-7b-hf-QandA_2g_v01-r2-v01\\checkpoint-19607",
14
+ "epoch": 38.99950273495773,
15
+ "eval_steps": 500,
16
+ "global_step": 19607,
17
+ "is_hyper_param_search": false,
18
+ "is_local_process_zero": true,
19
+ "is_world_process_zero": true,
20
+ "log_history": [
21
+ {
22
+ "epoch": 1.0,
23
+ "learning_rate": 9.75e-05,
24
+ "loss": 0.8234,
25
+ "step": 502
26
+ },
27
+ {
28
+ "epoch": 1.0,
29
+ "eval_loss": 0.6993071436882019,
30
+ "eval_runtime": 196.6154,
31
+ "eval_samples_per_second": 2.05,
32
+ "eval_steps_per_second": 0.259,
33
+ "step": 502
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "learning_rate": 9.499501992031873e-05,
38
+ "loss": 0.6127,
39
+ "step": 1005
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_loss": 0.599130392074585,
44
+ "eval_runtime": 196.9551,
45
+ "eval_samples_per_second": 2.046,
46
+ "eval_steps_per_second": 0.259,
47
+ "step": 1005
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "learning_rate": 9.249003984063745e-05,
52
+ "loss": 0.5345,
53
+ "step": 1508
54
+ },
55
+ {
56
+ "epoch": 3.0,
57
+ "eval_loss": 0.5176905989646912,
58
+ "eval_runtime": 198.2897,
59
+ "eval_samples_per_second": 2.032,
60
+ "eval_steps_per_second": 0.257,
61
+ "step": 1508
62
+ },
63
+ {
64
+ "epoch": 4.0,
65
+ "learning_rate": 8.998505976095618e-05,
66
+ "loss": 0.4679,
67
+ "step": 2011
68
+ },
69
+ {
70
+ "epoch": 4.0,
71
+ "eval_loss": 0.4569143056869507,
72
+ "eval_runtime": 196.7986,
73
+ "eval_samples_per_second": 2.048,
74
+ "eval_steps_per_second": 0.259,
75
+ "step": 2011
76
+ },
77
+ {
78
+ "epoch": 5.0,
79
+ "learning_rate": 8.748505976095617e-05,
80
+ "loss": 0.4086,
81
+ "step": 2513
82
+ },
83
+ {
84
+ "epoch": 5.0,
85
+ "eval_loss": 0.4007655382156372,
86
+ "eval_runtime": 197.0527,
87
+ "eval_samples_per_second": 2.045,
88
+ "eval_steps_per_second": 0.259,
89
+ "step": 2513
90
+ },
91
+ {
92
+ "epoch": 6.0,
93
+ "learning_rate": 8.49800796812749e-05,
94
+ "loss": 0.3562,
95
+ "step": 3016
96
+ },
97
+ {
98
+ "epoch": 6.0,
99
+ "eval_loss": 0.3483542799949646,
100
+ "eval_runtime": 198.1137,
101
+ "eval_samples_per_second": 2.034,
102
+ "eval_steps_per_second": 0.257,
103
+ "step": 3016
104
+ },
105
+ {
106
+ "epoch": 7.0,
107
+ "learning_rate": 8.247509960159363e-05,
108
+ "loss": 0.3143,
109
+ "step": 3519
110
+ },
111
+ {
112
+ "epoch": 7.0,
113
+ "eval_loss": 0.3145788311958313,
114
+ "eval_runtime": 196.7599,
115
+ "eval_samples_per_second": 2.048,
116
+ "eval_steps_per_second": 0.259,
117
+ "step": 3519
118
+ },
119
+ {
120
+ "epoch": 8.0,
121
+ "learning_rate": 7.997011952191235e-05,
122
+ "loss": 0.2789,
123
+ "step": 4022
124
+ },
125
+ {
126
+ "epoch": 8.0,
127
+ "eval_loss": 0.2774547338485718,
128
+ "eval_runtime": 195.9949,
129
+ "eval_samples_per_second": 2.056,
130
+ "eval_steps_per_second": 0.26,
131
+ "step": 4022
132
+ },
133
+ {
134
+ "epoch": 9.0,
135
+ "learning_rate": 7.747011952191235e-05,
136
+ "loss": 0.2506,
137
+ "step": 4524
138
+ },
139
+ {
140
+ "epoch": 9.0,
141
+ "eval_loss": 0.25074610114097595,
142
+ "eval_runtime": 195.9777,
143
+ "eval_samples_per_second": 2.056,
144
+ "eval_steps_per_second": 0.26,
145
+ "step": 4524
146
+ },
147
+ {
148
+ "epoch": 10.0,
149
+ "learning_rate": 7.496513944223108e-05,
150
+ "loss": 0.2257,
151
+ "step": 5027
152
+ },
153
+ {
154
+ "epoch": 10.0,
155
+ "eval_loss": 0.22645100951194763,
156
+ "eval_runtime": 195.4398,
157
+ "eval_samples_per_second": 2.062,
158
+ "eval_steps_per_second": 0.261,
159
+ "step": 5027
160
+ },
161
+ {
162
+ "epoch": 11.0,
163
+ "learning_rate": 7.24601593625498e-05,
164
+ "loss": 0.2031,
165
+ "step": 5530
166
+ },
167
+ {
168
+ "epoch": 11.0,
169
+ "eval_loss": 0.20663012564182281,
170
+ "eval_runtime": 196.0347,
171
+ "eval_samples_per_second": 2.056,
172
+ "eval_steps_per_second": 0.26,
173
+ "step": 5530
174
+ },
175
+ {
176
+ "epoch": 12.0,
177
+ "learning_rate": 6.995517928286853e-05,
178
+ "loss": 0.1845,
179
+ "step": 6033
180
+ },
181
+ {
182
+ "epoch": 12.0,
183
+ "eval_loss": 0.1891084611415863,
184
+ "eval_runtime": 195.5982,
185
+ "eval_samples_per_second": 2.06,
186
+ "eval_steps_per_second": 0.261,
187
+ "step": 6033
188
+ },
189
+ {
190
+ "epoch": 13.0,
191
+ "learning_rate": 6.745517928286854e-05,
192
+ "loss": 0.1691,
193
+ "step": 6535
194
+ },
195
+ {
196
+ "epoch": 13.0,
197
+ "eval_loss": 0.17209213972091675,
198
+ "eval_runtime": 196.259,
199
+ "eval_samples_per_second": 2.053,
200
+ "eval_steps_per_second": 0.26,
201
+ "step": 6535
202
+ },
203
+ {
204
+ "epoch": 14.0,
205
+ "learning_rate": 6.495019920318725e-05,
206
+ "loss": 0.1542,
207
+ "step": 7038
208
+ },
209
+ {
210
+ "epoch": 14.0,
211
+ "eval_loss": 0.15993133187294006,
212
+ "eval_runtime": 196.0195,
213
+ "eval_samples_per_second": 2.056,
214
+ "eval_steps_per_second": 0.26,
215
+ "step": 7038
216
+ },
217
+ {
218
+ "epoch": 15.0,
219
+ "learning_rate": 6.244521912350598e-05,
220
+ "loss": 0.1414,
221
+ "step": 7541
222
+ },
223
+ {
224
+ "epoch": 15.0,
225
+ "eval_loss": 0.14806699752807617,
226
+ "eval_runtime": 195.4909,
227
+ "eval_samples_per_second": 2.061,
228
+ "eval_steps_per_second": 0.261,
229
+ "step": 7541
230
+ },
231
+ {
232
+ "epoch": 16.0,
233
+ "learning_rate": 5.994023904382471e-05,
234
+ "loss": 0.1309,
235
+ "step": 8044
236
+ },
237
+ {
238
+ "epoch": 16.0,
239
+ "eval_loss": 0.13711141049861908,
240
+ "eval_runtime": 195.7592,
241
+ "eval_samples_per_second": 2.059,
242
+ "eval_steps_per_second": 0.261,
243
+ "step": 8044
244
+ },
245
+ {
246
+ "epoch": 17.0,
247
+ "learning_rate": 5.7440239043824705e-05,
248
+ "loss": 0.1222,
249
+ "step": 8546
250
+ },
251
+ {
252
+ "epoch": 17.0,
253
+ "eval_loss": 0.13089050352573395,
254
+ "eval_runtime": 195.6972,
255
+ "eval_samples_per_second": 2.059,
256
+ "eval_steps_per_second": 0.261,
257
+ "step": 8546
258
+ },
259
+ {
260
+ "epoch": 18.0,
261
+ "learning_rate": 5.493525896414343e-05,
262
+ "loss": 0.1134,
263
+ "step": 9049
264
+ },
265
+ {
266
+ "epoch": 18.0,
267
+ "eval_loss": 0.12404956668615341,
268
+ "eval_runtime": 195.8078,
269
+ "eval_samples_per_second": 2.058,
270
+ "eval_steps_per_second": 0.26,
271
+ "step": 9049
272
+ },
273
+ {
274
+ "epoch": 19.0,
275
+ "learning_rate": 5.243027888446216e-05,
276
+ "loss": 0.107,
277
+ "step": 9552
278
+ },
279
+ {
280
+ "epoch": 19.0,
281
+ "eval_loss": 0.11492674797773361,
282
+ "eval_runtime": 196.1937,
283
+ "eval_samples_per_second": 2.054,
284
+ "eval_steps_per_second": 0.26,
285
+ "step": 9552
286
+ },
287
+ {
288
+ "epoch": 20.0,
289
+ "learning_rate": 4.992529880478088e-05,
290
+ "loss": 0.1014,
291
+ "step": 10055
292
+ },
293
+ {
294
+ "epoch": 20.0,
295
+ "eval_loss": 0.1099749356508255,
296
+ "eval_runtime": 196.1333,
297
+ "eval_samples_per_second": 2.055,
298
+ "eval_steps_per_second": 0.26,
299
+ "step": 10055
300
+ },
301
+ {
302
+ "epoch": 21.0,
303
+ "learning_rate": 4.742529880478088e-05,
304
+ "loss": 0.0966,
305
+ "step": 10557
306
+ },
307
+ {
308
+ "epoch": 21.0,
309
+ "eval_loss": 0.1054670661687851,
310
+ "eval_runtime": 196.2688,
311
+ "eval_samples_per_second": 2.053,
312
+ "eval_steps_per_second": 0.26,
313
+ "step": 10557
314
+ },
315
+ {
316
+ "epoch": 22.0,
317
+ "learning_rate": 4.49203187250996e-05,
318
+ "loss": 0.0925,
319
+ "step": 11060
320
+ },
321
+ {
322
+ "epoch": 22.0,
323
+ "eval_loss": 0.1019241139292717,
324
+ "eval_runtime": 195.6941,
325
+ "eval_samples_per_second": 2.059,
326
+ "eval_steps_per_second": 0.261,
327
+ "step": 11060
328
+ },
329
+ {
330
+ "epoch": 23.0,
331
+ "learning_rate": 4.241533864541833e-05,
332
+ "loss": 0.0883,
333
+ "step": 11563
334
+ },
335
+ {
336
+ "epoch": 23.0,
337
+ "eval_loss": 0.09764768928289413,
338
+ "eval_runtime": 196.6317,
339
+ "eval_samples_per_second": 2.05,
340
+ "eval_steps_per_second": 0.259,
341
+ "step": 11563
342
+ },
343
+ {
344
+ "epoch": 24.0,
345
+ "learning_rate": 3.9910358565737054e-05,
346
+ "loss": 0.085,
347
+ "step": 12066
348
+ },
349
+ {
350
+ "epoch": 24.0,
351
+ "eval_loss": 0.09518074989318848,
352
+ "eval_runtime": 196.0702,
353
+ "eval_samples_per_second": 2.055,
354
+ "eval_steps_per_second": 0.26,
355
+ "step": 12066
356
+ },
357
+ {
358
+ "epoch": 25.0,
359
+ "learning_rate": 3.7410358565737055e-05,
360
+ "loss": 0.0832,
361
+ "step": 12568
362
+ },
363
+ {
364
+ "epoch": 25.0,
365
+ "eval_loss": 0.09052950888872147,
366
+ "eval_runtime": 196.4373,
367
+ "eval_samples_per_second": 2.052,
368
+ "eval_steps_per_second": 0.26,
369
+ "step": 12568
370
+ },
371
+ {
372
+ "epoch": 26.0,
373
+ "learning_rate": 3.490537848605578e-05,
374
+ "loss": 0.0795,
375
+ "step": 13071
376
+ },
377
+ {
378
+ "epoch": 26.0,
379
+ "eval_loss": 0.0874876081943512,
380
+ "eval_runtime": 196.0257,
381
+ "eval_samples_per_second": 2.056,
382
+ "eval_steps_per_second": 0.26,
383
+ "step": 13071
384
+ },
385
+ {
386
+ "epoch": 27.0,
387
+ "learning_rate": 3.24003984063745e-05,
388
+ "loss": 0.078,
389
+ "step": 13574
390
+ },
391
+ {
392
+ "epoch": 27.0,
393
+ "eval_loss": 0.08521231263875961,
394
+ "eval_runtime": 195.7721,
395
+ "eval_samples_per_second": 2.059,
396
+ "eval_steps_per_second": 0.261,
397
+ "step": 13574
398
+ },
399
+ {
400
+ "epoch": 28.0,
401
+ "learning_rate": 2.989541832669323e-05,
402
+ "loss": 0.0761,
403
+ "step": 14077
404
+ },
405
+ {
406
+ "epoch": 28.0,
407
+ "eval_loss": 0.08261983096599579,
408
+ "eval_runtime": 195.9135,
409
+ "eval_samples_per_second": 2.057,
410
+ "eval_steps_per_second": 0.26,
411
+ "step": 14077
412
+ },
413
+ {
414
+ "epoch": 29.0,
415
+ "learning_rate": 2.739541832669323e-05,
416
+ "loss": 0.0748,
417
+ "step": 14579
418
+ },
419
+ {
420
+ "epoch": 29.0,
421
+ "eval_loss": 0.08173193037509918,
422
+ "eval_runtime": 196.0573,
423
+ "eval_samples_per_second": 2.056,
424
+ "eval_steps_per_second": 0.26,
425
+ "step": 14579
426
+ },
427
+ {
428
+ "epoch": 30.0,
429
+ "learning_rate": 2.4890438247011953e-05,
430
+ "loss": 0.0727,
431
+ "step": 15082
432
+ },
433
+ {
434
+ "epoch": 30.0,
435
+ "eval_loss": 0.07807794213294983,
436
+ "eval_runtime": 196.0191,
437
+ "eval_samples_per_second": 2.056,
438
+ "eval_steps_per_second": 0.26,
439
+ "step": 15082
440
+ },
441
+ {
442
+ "epoch": 31.0,
443
+ "learning_rate": 2.2385458167330677e-05,
444
+ "loss": 0.0712,
445
+ "step": 15585
446
+ },
447
+ {
448
+ "epoch": 31.0,
449
+ "eval_loss": 0.07727421820163727,
450
+ "eval_runtime": 196.043,
451
+ "eval_samples_per_second": 2.056,
452
+ "eval_steps_per_second": 0.26,
453
+ "step": 15585
454
+ },
455
+ {
456
+ "epoch": 32.0,
457
+ "learning_rate": 1.9880478087649404e-05,
458
+ "loss": 0.07,
459
+ "step": 16088
460
+ },
461
+ {
462
+ "epoch": 32.0,
463
+ "eval_loss": 0.07521162927150726,
464
+ "eval_runtime": 196.5081,
465
+ "eval_samples_per_second": 2.051,
466
+ "eval_steps_per_second": 0.26,
467
+ "step": 16088
468
+ },
469
+ {
470
+ "epoch": 33.0,
471
+ "learning_rate": 1.7380478087649405e-05,
472
+ "loss": 0.069,
473
+ "step": 16590
474
+ },
475
+ {
476
+ "epoch": 33.0,
477
+ "eval_loss": 0.07360897213220596,
478
+ "eval_runtime": 196.0187,
479
+ "eval_samples_per_second": 2.056,
480
+ "eval_steps_per_second": 0.26,
481
+ "step": 16590
482
+ },
483
+ {
484
+ "epoch": 34.0,
485
+ "learning_rate": 1.4875498007968128e-05,
486
+ "loss": 0.0682,
487
+ "step": 17093
488
+ },
489
+ {
490
+ "epoch": 34.0,
491
+ "eval_loss": 0.07235261797904968,
492
+ "eval_runtime": 196.2571,
493
+ "eval_samples_per_second": 2.053,
494
+ "eval_steps_per_second": 0.26,
495
+ "step": 17093
496
+ },
497
+ {
498
+ "epoch": 35.0,
499
+ "learning_rate": 1.2370517928286854e-05,
500
+ "loss": 0.0671,
501
+ "step": 17596
502
+ },
503
+ {
504
+ "epoch": 35.0,
505
+ "eval_loss": 0.07117172330617905,
506
+ "eval_runtime": 197.0583,
507
+ "eval_samples_per_second": 2.045,
508
+ "eval_steps_per_second": 0.259,
509
+ "step": 17596
510
+ },
511
+ {
512
+ "epoch": 36.0,
513
+ "learning_rate": 9.86553784860558e-06,
514
+ "loss": 0.0663,
515
+ "step": 18099
516
+ },
517
+ {
518
+ "epoch": 36.0,
519
+ "eval_loss": 0.07005689293146133,
520
+ "eval_runtime": 196.2214,
521
+ "eval_samples_per_second": 2.054,
522
+ "eval_steps_per_second": 0.26,
523
+ "step": 18099
524
+ },
525
+ {
526
+ "epoch": 37.0,
527
+ "learning_rate": 7.365537848605578e-06,
528
+ "loss": 0.0656,
529
+ "step": 18601
530
+ },
531
+ {
532
+ "epoch": 37.0,
533
+ "eval_loss": 0.06923193484544754,
534
+ "eval_runtime": 195.9767,
535
+ "eval_samples_per_second": 2.056,
536
+ "eval_steps_per_second": 0.26,
537
+ "step": 18601
538
+ },
539
+ {
540
+ "epoch": 38.0,
541
+ "learning_rate": 4.860557768924303e-06,
542
+ "loss": 0.0648,
543
+ "step": 19104
544
+ },
545
+ {
546
+ "epoch": 38.0,
547
+ "eval_loss": 0.06851127743721008,
548
+ "eval_runtime": 196.8249,
549
+ "eval_samples_per_second": 2.048,
550
+ "eval_steps_per_second": 0.259,
551
+ "step": 19104
552
+ },
553
+ {
554
+ "epoch": 39.0,
555
+ "learning_rate": 2.355577689243028e-06,
556
+ "loss": 0.064,
557
+ "step": 19607
558
+ },
559
+ {
560
+ "epoch": 39.0,
561
+ "eval_loss": 0.0680607408285141,
562
+ "eval_runtime": 195.9876,
563
+ "eval_samples_per_second": 2.056,
564
+ "eval_steps_per_second": 0.26,
565
+ "step": 19607
566
+ }
567
+ ],
568
+ "logging_steps": 500,
569
+ "max_steps": 20080,
570
+ "num_train_epochs": 40,
571
+ "save_steps": 500,
572
+ "total_flos": 4.6336747921934746e+17,
573
+ "trial_name": null,
574
+ "trial_params": null
575
+ }
576
+ ```
577
 
578
  ## Model Details
579