Training in progress, step 500
Browse files- adapter_config.json +5 -5
- trainer_log.jsonl +101 -101
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -20,13 +20,13 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"k_proj",
|
24 |
-
"down_proj",
|
25 |
-
"q_proj",
|
26 |
-
"up_proj",
|
27 |
"v_proj",
|
|
|
28 |
"o_proj",
|
29 |
-
"
|
|
|
|
|
|
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
|
|
|
|
|
|
23 |
"v_proj",
|
24 |
+
"q_proj",
|
25 |
"o_proj",
|
26 |
+
"k_proj",
|
27 |
+
"up_proj",
|
28 |
+
"gate_proj",
|
29 |
+
"down_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
trainer_log.jsonl
CHANGED
@@ -1,101 +1,101 @@
|
|
1 |
-
{"current_steps": 10, "total_steps": 500, "loss": 2.1156, "lr": 4e-05, "epoch": 0.08888888888888889, "percentage": 2.0, "elapsed_time": "0:00:
|
2 |
-
{"current_steps": 10, "total_steps": 500, "eval_loss": 1.5894473791122437, "epoch": 0.08888888888888889, "percentage": 2.0, "elapsed_time": "0:00:
|
3 |
-
{"current_steps": 20, "total_steps": 500, "loss": 1.1893, "lr": 8e-05, "epoch": 0.17777777777777778, "percentage": 4.0, "elapsed_time": "0:
|
4 |
-
{"current_steps": 20, "total_steps": 500, "eval_loss": 0.6867849826812744, "epoch": 0.17777777777777778, "percentage": 4.0, "elapsed_time": "0:01:
|
5 |
-
{"current_steps": 30, "total_steps": 500, "loss": 0.5218, "lr": 0.00012, "epoch": 0.26666666666666666, "percentage": 6.0, "elapsed_time": "0:01:
|
6 |
-
{"current_steps": 30, "total_steps": 500, "eval_loss": 0.45551854372024536, "epoch": 0.26666666666666666, "percentage": 6.0, "elapsed_time": "0:01
|
7 |
-
{"current_steps": 40, "total_steps": 500, "loss": 0.5292, "lr": 0.00016, "epoch": 0.35555555555555557, "percentage": 8.0, "elapsed_time": "0:02:
|
8 |
-
{"current_steps": 40, "total_steps": 500, "eval_loss": 0.3795148730278015, "epoch": 0.35555555555555557, "percentage": 8.0, "elapsed_time": "0:02:
|
9 |
-
{"current_steps": 50, "total_steps": 500, "loss": 0.3866, "lr": 0.0002, "epoch": 0.4444444444444444, "percentage": 10.0, "elapsed_time": "0:
|
10 |
-
{"current_steps": 50, "total_steps": 500, "eval_loss": 0.30648669600486755, "epoch": 0.4444444444444444, "percentage": 10.0, "elapsed_time": "0:
|
11 |
-
{"current_steps": 60, "total_steps": 500, "loss": 0.3232, "lr": 0.00019975640502598244, "epoch": 0.5333333333333333, "percentage": 12.0, "elapsed_time": "0:03:
|
12 |
-
{"current_steps": 60, "total_steps": 500, "eval_loss": 0.20737296342849731, "epoch": 0.5333333333333333, "percentage": 12.0, "elapsed_time": "0:03:
|
13 |
-
{"current_steps": 70, "total_steps": 500, "loss": 0.1802, "lr": 0.00019902680687415705, "epoch": 0.6222222222222222, "percentage": 14.0, "elapsed_time": "0:
|
14 |
-
{"current_steps": 70, "total_steps": 500, "eval_loss": 0.15315091609954834, "epoch": 0.6222222222222222, "percentage": 14.0, "elapsed_time": "0:
|
15 |
-
{"current_steps": 80, "total_steps": 500, "loss": 0.21, "lr": 0.00019781476007338058, "epoch": 0.7111111111111111, "percentage": 16.0, "elapsed_time": "0:04
|
16 |
-
{"current_steps": 80, "total_steps": 500, "eval_loss": 0.13480396568775177, "epoch": 0.7111111111111111, "percentage": 16.0, "elapsed_time": "0:
|
17 |
-
{"current_steps": 90, "total_steps": 500, "loss": 0.158, "lr": 0.0001961261695938319, "epoch": 0.8, "percentage": 18.0, "elapsed_time": "0:
|
18 |
-
{"current_steps": 90, "total_steps": 500, "eval_loss": 0.13721750676631927, "epoch": 0.8, "percentage": 18.0, "elapsed_time": "0:
|
19 |
-
{"current_steps": 100, "total_steps": 500, "loss": 0.1629, "lr": 0.00019396926207859084, "epoch": 0.8888888888888888, "percentage": 20.0, "elapsed_time": "0:
|
20 |
-
{"current_steps": 100, "total_steps": 500, "eval_loss": 0.12762245535850525, "epoch": 0.8888888888888888, "percentage": 20.0, "elapsed_time": "0:
|
21 |
-
{"current_steps": 110, "total_steps": 500, "loss": 0.0966, "lr": 0.0001913545457642601, "epoch": 0.9777777777777777, "percentage": 22.0, "elapsed_time": "0:
|
22 |
-
{"current_steps": 110, "total_steps": 500, "eval_loss": 0.10031093657016754, "epoch": 0.9777777777777777, "percentage": 22.0, "elapsed_time": "0:
|
23 |
-
{"current_steps": 120, "total_steps": 500, "loss": 0.0643, "lr": 0.00018829475928589271, "epoch": 1.0666666666666667, "percentage": 24.0, "elapsed_time": "0:
|
24 |
-
{"current_steps": 120, "total_steps": 500, "eval_loss": 0.08794313669204712, "epoch": 1.0666666666666667, "percentage": 24.0, "elapsed_time": "0:
|
25 |
-
{"current_steps": 130, "total_steps": 500, "loss": 0.0726, "lr": 0.0001848048096156426, "epoch": 1.1555555555555554, "percentage": 26.0, "elapsed_time": "0:
|
26 |
-
{"current_steps": 130, "total_steps": 500, "eval_loss": 0.08720792084932327, "epoch": 1.1555555555555554, "percentage": 26.0, "elapsed_time": "0:
|
27 |
-
{"current_steps": 140, "total_steps": 500, "loss": 0.0493, "lr": 0.00018090169943749476, "epoch": 1.2444444444444445, "percentage": 28.0, "elapsed_time": "0:
|
28 |
-
{"current_steps": 140, "total_steps": 500, "eval_loss": 0.09057007730007172, "epoch": 1.2444444444444445, "percentage": 28.0, "elapsed_time": "0:
|
29 |
-
{"current_steps": 150, "total_steps": 500, "loss": 0.0746, "lr": 0.0001766044443118978, "epoch": 1.3333333333333333, "percentage": 30.0, "elapsed_time": "0:
|
30 |
-
{"current_steps": 150, "total_steps": 500, "eval_loss": 0.058685798197984695, "epoch": 1.3333333333333333, "percentage": 30.0, "elapsed_time": "0:
|
31 |
-
{"current_steps": 160, "total_steps": 500, "loss": 0.0473, "lr": 0.0001719339800338651, "epoch": 1.4222222222222223, "percentage": 32.0, "elapsed_time": "0:
|
32 |
-
{"current_steps": 160, "total_steps": 500, "eval_loss": 0.0560651533305645, "epoch": 1.4222222222222223, "percentage": 32.0, "elapsed_time": "0:
|
33 |
-
{"current_steps": 170, "total_steps": 500, "loss": 0.0644, "lr": 0.00016691306063588583, "epoch": 1.511111111111111, "percentage": 34.0, "elapsed_time": "0:
|
34 |
-
{"current_steps": 170, "total_steps": 500, "eval_loss": 0.05025744438171387, "epoch": 1.511111111111111, "percentage": 34.0, "elapsed_time": "0:
|
35 |
-
{"current_steps": 180, "total_steps": 500, "loss": 0.0366, "lr": 0.0001615661475325658, "epoch": 1.6, "percentage": 36.0, "elapsed_time": "0:
|
36 |
-
{"current_steps": 180, "total_steps": 500, "eval_loss": 0.030684156343340874, "epoch": 1.6, "percentage": 36.0, "elapsed_time": "0:
|
37 |
-
{"current_steps": 190, "total_steps": 500, "loss": 0.0247, "lr": 0.0001559192903470747, "epoch": 1.6888888888888889, "percentage": 38.0, "elapsed_time": "0:
|
38 |
-
{"current_steps": 190, "total_steps": 500, "eval_loss": 0.023328043520450592, "epoch": 1.6888888888888889, "percentage": 38.0, "elapsed_time": "0:
|
39 |
-
{"current_steps": 200, "total_steps": 500, "loss": 0.01, "lr": 0.00015000000000000001, "epoch": 1.7777777777777777, "percentage": 40.0, "elapsed_time": "0:
|
40 |
-
{"current_steps": 200, "total_steps": 500, "eval_loss": 0.021545417606830597, "epoch": 1.7777777777777777, "percentage": 40.0, "elapsed_time": "0:
|
41 |
-
{"current_steps": 210, "total_steps": 500, "loss": 0.0393, "lr": 0.00014383711467890774, "epoch": 1.8666666666666667, "percentage": 42.0, "elapsed_time": "0:
|
42 |
-
{"current_steps": 210, "total_steps": 500, "eval_loss": 0.012232878245413303, "epoch": 1.8666666666666667, "percentage": 42.0, "elapsed_time": "0:
|
43 |
-
{"current_steps": 220, "total_steps": 500, "loss": 0.0299, "lr": 0.00013746065934159123, "epoch": 1.9555555555555557, "percentage": 44.0, "elapsed_time": "0:
|
44 |
-
{"current_steps": 220, "total_steps": 500, "eval_loss": 0.01798514649271965, "epoch": 1.9555555555555557, "percentage": 44.0, "elapsed_time": "0:
|
45 |
-
{"current_steps": 230, "total_steps": 500, "loss": 0.0166, "lr": 0.00013090169943749476, "epoch": 2.0444444444444443, "percentage": 46.0, "elapsed_time": "0:
|
46 |
-
{"current_steps": 230, "total_steps": 500, "eval_loss": 0.008207106962800026, "epoch": 2.0444444444444443, "percentage": 46.0, "elapsed_time": "0:
|
47 |
-
{"current_steps": 240, "total_steps": 500, "loss": 0.0319, "lr": 0.00012419218955996676, "epoch": 2.1333333333333333, "percentage": 48.0, "elapsed_time": "0:
|
48 |
-
{"current_steps": 240, "total_steps": 500, "eval_loss": 0.008276881650090218, "epoch": 2.1333333333333333, "percentage": 48.0, "elapsed_time": "0:
|
49 |
-
{"current_steps": 250, "total_steps": 500, "loss": 0.0077, "lr": 0.00011736481776669306, "epoch": 2.2222222222222223, "percentage": 50.0, "elapsed_time": "0:
|
50 |
-
{"current_steps": 250, "total_steps": 500, "eval_loss": 0.007150276098400354, "epoch": 2.2222222222222223, "percentage": 50.0, "elapsed_time": "0:
|
51 |
-
{"current_steps": 260, "total_steps": 500, "loss": 0.0141, "lr": 0.00011045284632676536, "epoch": 2.311111111111111, "percentage": 52.0, "elapsed_time": "0:
|
52 |
-
{"current_steps": 260, "total_steps": 500, "eval_loss": 0.003109171986579895, "epoch": 2.311111111111111, "percentage": 52.0, "elapsed_time": "0:
|
53 |
-
{"current_steps": 270, "total_steps": 500, "loss": 0.0017, "lr": 0.00010348994967025012, "epoch": 2.4, "percentage": 54.0, "elapsed_time": "0:
|
54 |
-
{"current_steps": 270, "total_steps": 500, "eval_loss": 0.012033730745315552, "epoch": 2.4, "percentage": 54.0, "elapsed_time": "0:
|
55 |
-
{"current_steps": 280, "total_steps": 500, "loss": 0.0015, "lr": 9.651005032974994e-05, "epoch": 2.488888888888889, "percentage": 56.0, "elapsed_time": "0:
|
56 |
-
{"current_steps": 280, "total_steps": 500, "eval_loss": 0.015280201099812984, "epoch": 2.488888888888889, "percentage": 56.0, "elapsed_time": "0:
|
57 |
-
{"current_steps": 290, "total_steps": 500, "loss": 0.0126, "lr": 8.954715367323468e-05, "epoch": 2.5777777777777775, "percentage": 58.0, "elapsed_time": "0:
|
58 |
-
{"current_steps": 290, "total_steps": 500, "eval_loss": 0.01406156551092863, "epoch": 2.5777777777777775, "percentage": 58.0, "elapsed_time": "0:
|
59 |
-
{"current_steps": 300, "total_steps": 500, "loss": 0.0043, "lr": 8.263518223330697e-05, "epoch": 2.6666666666666665, "percentage": 60.0, "elapsed_time": "0:
|
60 |
-
{"current_steps": 300, "total_steps": 500, "eval_loss": 0.0021963752806186676, "epoch": 2.6666666666666665, "percentage": 60.0, "elapsed_time": "0:
|
61 |
-
{"current_steps": 310, "total_steps": 500, "loss": 0.0068, "lr": 7.580781044003324e-05, "epoch": 2.7555555555555555, "percentage": 62.0, "elapsed_time": "0:
|
62 |
-
{"current_steps": 310, "total_steps": 500, "eval_loss": 0.001871286309324205, "epoch": 2.7555555555555555, "percentage": 62.0, "elapsed_time": "0:
|
63 |
-
{"current_steps": 320, "total_steps": 500, "loss": 0.0018, "lr": 6.909830056250527e-05, "epoch": 2.8444444444444446, "percentage": 64.0, "elapsed_time": "0:
|
64 |
-
{"current_steps": 320, "total_steps": 500, "eval_loss": 0.002184124430641532, "epoch": 2.8444444444444446, "percentage": 64.0, "elapsed_time": "0:
|
65 |
-
{"current_steps": 330, "total_steps": 500, "loss": 0.0026, "lr": 6.25393406584088e-05, "epoch": 2.9333333333333336, "percentage": 66.0, "elapsed_time": "0:
|
66 |
-
{"current_steps": 330, "total_steps": 500, "eval_loss": 0.0034216546919196844, "epoch": 2.9333333333333336, "percentage": 66.0, "elapsed_time": "0:
|
67 |
-
{"current_steps": 340, "total_steps": 500, "loss": 0.0017, "lr": 5.616288532109225e-05, "epoch": 3.022222222222222, "percentage": 68.0, "elapsed_time": "0:
|
68 |
-
{"current_steps": 340, "total_steps": 500, "eval_loss": 0.007565508596599102, "epoch": 3.022222222222222, "percentage": 68.0, "elapsed_time": "0:
|
69 |
-
{"current_steps": 350, "total_steps": 500, "loss": 0.0002, "lr": 5.000000000000002e-05, "epoch": 3.111111111111111, "percentage": 70.0, "elapsed_time": "0:
|
70 |
-
{"current_steps": 350, "total_steps": 500, "eval_loss": 0.010203778743743896, "epoch": 3.111111111111111, "percentage": 70.0, "elapsed_time": "0:
|
71 |
-
{"current_steps": 360, "total_steps": 500, "loss": 0.0004, "lr": 4.4080709652925336e-05, "epoch": 3.2, "percentage": 72.0, "elapsed_time": "0:
|
72 |
-
{"current_steps": 360, "total_steps": 500, "eval_loss": 0.011154056526720524, "epoch": 3.2, "percentage": 72.0, "elapsed_time": "0:
|
73 |
-
{"current_steps": 370, "total_steps": 500, "loss": 0.006, "lr": 3.843385246743417e-05, "epoch": 3.2888888888888888, "percentage": 74.0, "elapsed_time": "0:
|
74 |
-
{"current_steps": 370, "total_steps": 500, "eval_loss": 0.00937813799828291, "epoch": 3.2888888888888888, "percentage": 74.0, "elapsed_time": "0:
|
75 |
-
{"current_steps": 380, "total_steps": 500, "loss": 0.0003, "lr": 3.308693936411421e-05, "epoch": 3.3777777777777778, "percentage": 76.0, "elapsed_time": "0:
|
76 |
-
{"current_steps": 380, "total_steps": 500, "eval_loss": 0.007455301936715841, "epoch": 3.3777777777777778, "percentage": 76.0, "elapsed_time": "0:
|
77 |
-
{"current_steps": 390, "total_steps": 500, "loss": 0.0003, "lr": 2.8066019966134904e-05, "epoch": 3.466666666666667, "percentage": 78.0, "elapsed_time": "0:
|
78 |
-
{"current_steps": 390, "total_steps": 500, "eval_loss": 0.006887929514050484, "epoch": 3.466666666666667, "percentage": 78.0, "elapsed_time": "0:
|
79 |
-
{"current_steps": 400, "total_steps": 500, "loss": 0.0002, "lr": 2.339555568810221e-05, "epoch": 3.5555555555555554, "percentage": 80.0, "elapsed_time": "0:
|
80 |
-
{"current_steps": 400, "total_steps": 500, "eval_loss": 0.006745634134858847, "epoch": 3.5555555555555554, "percentage": 80.0, "elapsed_time": "0:
|
81 |
-
{"current_steps": 410, "total_steps": 500, "loss": 0.0005, "lr": 1.9098300562505266e-05, "epoch": 3.6444444444444444, "percentage": 82.0, "elapsed_time": "0:
|
82 |
-
{"current_steps": 410, "total_steps": 500, "eval_loss": 0.006609635427594185, "epoch": 3.6444444444444444, "percentage": 82.0, "elapsed_time": "0:
|
83 |
-
{"current_steps": 420, "total_steps": 500, "loss": 0.0003, "lr": 1.5195190384357404e-05, "epoch": 3.7333333333333334, "percentage": 84.0, "elapsed_time": "0:
|
84 |
-
{"current_steps": 420, "total_steps": 500, "eval_loss": 0.007157918065786362, "epoch": 3.7333333333333334, "percentage": 84.0, "elapsed_time": "0:
|
85 |
-
{"current_steps": 430, "total_steps": 500, "loss": 0.0037, "lr": 1.1705240714107302e-05, "epoch": 3.822222222222222, "percentage": 86.0, "elapsed_time": "0:
|
86 |
-
{"current_steps": 430, "total_steps": 500, "eval_loss": 0.006302958354353905, "epoch": 3.822222222222222, "percentage": 86.0, "elapsed_time": "0:
|
87 |
-
{"current_steps": 440, "total_steps": 500, "loss": 0.004, "lr": 8.645454235739903e-06, "epoch": 3.911111111111111, "percentage": 88.0, "elapsed_time": "0:
|
88 |
-
{"current_steps": 440, "total_steps": 500, "eval_loss": 0.005341523326933384, "epoch": 3.911111111111111, "percentage": 88.0, "elapsed_time": "0:
|
89 |
-
{"current_steps": 450, "total_steps": 500, "loss": 0.0003, "lr": 6.030737921409169e-06, "epoch": 4.0, "percentage": 90.0, "elapsed_time": "0:24
|
90 |
-
{"current_steps": 450, "total_steps": 500, "eval_loss": 0.005209792871028185, "epoch": 4.0, "percentage": 90.0, "elapsed_time": "0:
|
91 |
-
{"current_steps": 460, "total_steps": 500, "loss": 0.0002, "lr": 3.873830406168111e-06, "epoch": 4.088888888888889, "percentage": 92.0, "elapsed_time": "0:
|
92 |
-
{"current_steps": 460, "total_steps": 500, "eval_loss": 0.005074501037597656, "epoch": 4.088888888888889, "percentage": 92.0, "elapsed_time": "0:
|
93 |
-
{"current_steps": 470, "total_steps": 500, "loss": 0.0002, "lr": 2.1852399266194314e-06, "epoch": 4.177777777777778, "percentage": 94.0, "elapsed_time": "0:
|
94 |
-
{"current_steps": 470, "total_steps": 500, "eval_loss": 0.004982742480933666, "epoch": 4.177777777777778, "percentage": 94.0, "elapsed_time": "0:
|
95 |
-
{"current_steps": 480, "total_steps": 500, "loss": 0.0006, "lr": 9.731931258429638e-07, "epoch": 4.266666666666667, "percentage": 96.0, "elapsed_time": "0:
|
96 |
-
{"current_steps": 480, "total_steps": 500, "eval_loss": 0.004874282516539097, "epoch": 4.266666666666667, "percentage": 96.0, "elapsed_time": "0:
|
97 |
-
{"current_steps": 490, "total_steps": 500, "loss": 0.0005, "lr": 2.4359497401758024e-07, "epoch": 4.355555555555555, "percentage": 98.0, "elapsed_time": "0:
|
98 |
-
{"current_steps": 490, "total_steps": 500, "eval_loss": 0.004786411300301552, "epoch": 4.355555555555555, "percentage": 98.0, "elapsed_time": "0:
|
99 |
-
{"current_steps": 500, "total_steps": 500, "loss": 0.0002, "lr": 0.0, "epoch": 4.444444444444445, "percentage": 100.0, "elapsed_time": "0:
|
100 |
-
{"current_steps": 500, "total_steps": 500, "eval_loss": 0.005105508491396904, "epoch": 4.444444444444445, "percentage": 100.0, "elapsed_time": "0:
|
101 |
-
{"current_steps": 500, "total_steps": 500, "epoch": 4.444444444444445, "percentage": 100.0, "elapsed_time": "0:
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 500, "loss": 2.1156, "lr": 4e-05, "epoch": 0.08888888888888889, "percentage": 2.0, "elapsed_time": "0:00:32", "remaining_time": "0:26:56"}
|
2 |
+
{"current_steps": 10, "total_steps": 500, "eval_loss": 1.5894473791122437, "epoch": 0.08888888888888889, "percentage": 2.0, "elapsed_time": "0:00:42", "remaining_time": "0:34:48"}
|
3 |
+
{"current_steps": 20, "total_steps": 500, "loss": 1.1893, "lr": 8e-05, "epoch": 0.17777777777777778, "percentage": 4.0, "elapsed_time": "0:01:12", "remaining_time": "0:29:02"}
|
4 |
+
{"current_steps": 20, "total_steps": 500, "eval_loss": 0.6867849826812744, "epoch": 0.17777777777777778, "percentage": 4.0, "elapsed_time": "0:01:22", "remaining_time": "0:32:50"}
|
5 |
+
{"current_steps": 30, "total_steps": 500, "loss": 0.5218, "lr": 0.00012, "epoch": 0.26666666666666666, "percentage": 6.0, "elapsed_time": "0:01:51", "remaining_time": "0:29:14"}
|
6 |
+
{"current_steps": 30, "total_steps": 500, "eval_loss": 0.45551854372024536, "epoch": 0.26666666666666666, "percentage": 6.0, "elapsed_time": "0:02:01", "remaining_time": "0:31:43"}
|
7 |
+
{"current_steps": 40, "total_steps": 500, "loss": 0.5292, "lr": 0.00016, "epoch": 0.35555555555555557, "percentage": 8.0, "elapsed_time": "0:02:31", "remaining_time": "0:29:02"}
|
8 |
+
{"current_steps": 40, "total_steps": 500, "eval_loss": 0.3795148730278015, "epoch": 0.35555555555555557, "percentage": 8.0, "elapsed_time": "0:02:40", "remaining_time": "0:30:50"}
|
9 |
+
{"current_steps": 50, "total_steps": 500, "loss": 0.3866, "lr": 0.0002, "epoch": 0.4444444444444444, "percentage": 10.0, "elapsed_time": "0:03:09", "remaining_time": "0:28:28"}
|
10 |
+
{"current_steps": 50, "total_steps": 500, "eval_loss": 0.30648669600486755, "epoch": 0.4444444444444444, "percentage": 10.0, "elapsed_time": "0:03:19", "remaining_time": "0:29:51"}
|
11 |
+
{"current_steps": 60, "total_steps": 500, "loss": 0.3232, "lr": 0.00019975640502598244, "epoch": 0.5333333333333333, "percentage": 12.0, "elapsed_time": "0:03:48", "remaining_time": "0:27:53"}
|
12 |
+
{"current_steps": 60, "total_steps": 500, "eval_loss": 0.20737296342849731, "epoch": 0.5333333333333333, "percentage": 12.0, "elapsed_time": "0:03:57", "remaining_time": "0:29:01"}
|
13 |
+
{"current_steps": 70, "total_steps": 500, "loss": 0.1802, "lr": 0.00019902680687415705, "epoch": 0.6222222222222222, "percentage": 14.0, "elapsed_time": "0:04:26", "remaining_time": "0:27:16"}
|
14 |
+
{"current_steps": 70, "total_steps": 500, "eval_loss": 0.15315091609954834, "epoch": 0.6222222222222222, "percentage": 14.0, "elapsed_time": "0:04:35", "remaining_time": "0:28:13"}
|
15 |
+
{"current_steps": 80, "total_steps": 500, "loss": 0.21, "lr": 0.00019781476007338058, "epoch": 0.7111111111111111, "percentage": 16.0, "elapsed_time": "0:05:04", "remaining_time": "0:26:38"}
|
16 |
+
{"current_steps": 80, "total_steps": 500, "eval_loss": 0.13480396568775177, "epoch": 0.7111111111111111, "percentage": 16.0, "elapsed_time": "0:05:13", "remaining_time": "0:27:26"}
|
17 |
+
{"current_steps": 90, "total_steps": 500, "loss": 0.158, "lr": 0.0001961261695938319, "epoch": 0.8, "percentage": 18.0, "elapsed_time": "0:05:42", "remaining_time": "0:26:00"}
|
18 |
+
{"current_steps": 90, "total_steps": 500, "eval_loss": 0.13721750676631927, "epoch": 0.8, "percentage": 18.0, "elapsed_time": "0:05:51", "remaining_time": "0:26:42"}
|
19 |
+
{"current_steps": 100, "total_steps": 500, "loss": 0.1629, "lr": 0.00019396926207859084, "epoch": 0.8888888888888888, "percentage": 20.0, "elapsed_time": "0:06:20", "remaining_time": "0:25:20"}
|
20 |
+
{"current_steps": 100, "total_steps": 500, "eval_loss": 0.12762245535850525, "epoch": 0.8888888888888888, "percentage": 20.0, "elapsed_time": "0:06:29", "remaining_time": "0:25:57"}
|
21 |
+
{"current_steps": 110, "total_steps": 500, "loss": 0.0966, "lr": 0.0001913545457642601, "epoch": 0.9777777777777777, "percentage": 22.0, "elapsed_time": "0:06:57", "remaining_time": "0:24:40"}
|
22 |
+
{"current_steps": 110, "total_steps": 500, "eval_loss": 0.10031093657016754, "epoch": 0.9777777777777777, "percentage": 22.0, "elapsed_time": "0:07:07", "remaining_time": "0:25:14"}
|
23 |
+
{"current_steps": 120, "total_steps": 500, "loss": 0.0643, "lr": 0.00018829475928589271, "epoch": 1.0666666666666667, "percentage": 24.0, "elapsed_time": "0:07:35", "remaining_time": "0:24:02"}
|
24 |
+
{"current_steps": 120, "total_steps": 500, "eval_loss": 0.08794313669204712, "epoch": 1.0666666666666667, "percentage": 24.0, "elapsed_time": "0:07:44", "remaining_time": "0:24:32"}
|
25 |
+
{"current_steps": 130, "total_steps": 500, "loss": 0.0726, "lr": 0.0001848048096156426, "epoch": 1.1555555555555554, "percentage": 26.0, "elapsed_time": "0:08:13", "remaining_time": "0:23:23"}
|
26 |
+
{"current_steps": 130, "total_steps": 500, "eval_loss": 0.08720792084932327, "epoch": 1.1555555555555554, "percentage": 26.0, "elapsed_time": "0:08:22", "remaining_time": "0:23:49"}
|
27 |
+
{"current_steps": 140, "total_steps": 500, "loss": 0.0493, "lr": 0.00018090169943749476, "epoch": 1.2444444444444445, "percentage": 28.0, "elapsed_time": "0:08:50", "remaining_time": "0:22:44"}
|
28 |
+
{"current_steps": 140, "total_steps": 500, "eval_loss": 0.09057007730007172, "epoch": 1.2444444444444445, "percentage": 28.0, "elapsed_time": "0:09:00", "remaining_time": "0:23:08"}
|
29 |
+
{"current_steps": 150, "total_steps": 500, "loss": 0.0746, "lr": 0.0001766044443118978, "epoch": 1.3333333333333333, "percentage": 30.0, "elapsed_time": "0:09:28", "remaining_time": "0:22:05"}
|
30 |
+
{"current_steps": 150, "total_steps": 500, "eval_loss": 0.058685798197984695, "epoch": 1.3333333333333333, "percentage": 30.0, "elapsed_time": "0:09:37", "remaining_time": "0:22:27"}
|
31 |
+
{"current_steps": 160, "total_steps": 500, "loss": 0.0473, "lr": 0.0001719339800338651, "epoch": 1.4222222222222223, "percentage": 32.0, "elapsed_time": "0:10:05", "remaining_time": "0:21:27"}
|
32 |
+
{"current_steps": 160, "total_steps": 500, "eval_loss": 0.0560651533305645, "epoch": 1.4222222222222223, "percentage": 32.0, "elapsed_time": "0:10:15", "remaining_time": "0:21:47"}
|
33 |
+
{"current_steps": 170, "total_steps": 500, "loss": 0.0644, "lr": 0.00016691306063588583, "epoch": 1.511111111111111, "percentage": 34.0, "elapsed_time": "0:10:43", "remaining_time": "0:20:49"}
|
34 |
+
{"current_steps": 170, "total_steps": 500, "eval_loss": 0.05025744438171387, "epoch": 1.511111111111111, "percentage": 34.0, "elapsed_time": "0:10:52", "remaining_time": "0:21:07"}
|
35 |
+
{"current_steps": 180, "total_steps": 500, "loss": 0.0366, "lr": 0.0001615661475325658, "epoch": 1.6, "percentage": 36.0, "elapsed_time": "0:11:21", "remaining_time": "0:20:11"}
|
36 |
+
{"current_steps": 180, "total_steps": 500, "eval_loss": 0.030684156343340874, "epoch": 1.6, "percentage": 36.0, "elapsed_time": "0:11:30", "remaining_time": "0:20:27"}
|
37 |
+
{"current_steps": 190, "total_steps": 500, "loss": 0.0247, "lr": 0.0001559192903470747, "epoch": 1.6888888888888889, "percentage": 38.0, "elapsed_time": "0:11:58", "remaining_time": "0:19:33"}
|
38 |
+
{"current_steps": 190, "total_steps": 500, "eval_loss": 0.023328043520450592, "epoch": 1.6888888888888889, "percentage": 38.0, "elapsed_time": "0:12:08", "remaining_time": "0:19:48"}
|
39 |
+
{"current_steps": 200, "total_steps": 500, "loss": 0.01, "lr": 0.00015000000000000001, "epoch": 1.7777777777777777, "percentage": 40.0, "elapsed_time": "0:12:37", "remaining_time": "0:18:55"}
|
40 |
+
{"current_steps": 200, "total_steps": 500, "eval_loss": 0.021545417606830597, "epoch": 1.7777777777777777, "percentage": 40.0, "elapsed_time": "0:12:46", "remaining_time": "0:19:10"}
|
41 |
+
{"current_steps": 210, "total_steps": 500, "loss": 0.0393, "lr": 0.00014383711467890774, "epoch": 1.8666666666666667, "percentage": 42.0, "elapsed_time": "0:13:15", "remaining_time": "0:18:17"}
|
42 |
+
{"current_steps": 210, "total_steps": 500, "eval_loss": 0.012232878245413303, "epoch": 1.8666666666666667, "percentage": 42.0, "elapsed_time": "0:13:24", "remaining_time": "0:18:30"}
|
43 |
+
{"current_steps": 220, "total_steps": 500, "loss": 0.0299, "lr": 0.00013746065934159123, "epoch": 1.9555555555555557, "percentage": 44.0, "elapsed_time": "0:13:52", "remaining_time": "0:17:40"}
|
44 |
+
{"current_steps": 220, "total_steps": 500, "eval_loss": 0.01798514649271965, "epoch": 1.9555555555555557, "percentage": 44.0, "elapsed_time": "0:14:02", "remaining_time": "0:17:52"}
|
45 |
+
{"current_steps": 230, "total_steps": 500, "loss": 0.0166, "lr": 0.00013090169943749476, "epoch": 2.0444444444444443, "percentage": 46.0, "elapsed_time": "0:14:30", "remaining_time": "0:17:02"}
|
46 |
+
{"current_steps": 230, "total_steps": 500, "eval_loss": 0.008207106962800026, "epoch": 2.0444444444444443, "percentage": 46.0, "elapsed_time": "0:14:40", "remaining_time": "0:17:13"}
|
47 |
+
{"current_steps": 240, "total_steps": 500, "loss": 0.0319, "lr": 0.00012419218955996676, "epoch": 2.1333333333333333, "percentage": 48.0, "elapsed_time": "0:15:08", "remaining_time": "0:16:24"}
|
48 |
+
{"current_steps": 240, "total_steps": 500, "eval_loss": 0.008276881650090218, "epoch": 2.1333333333333333, "percentage": 48.0, "elapsed_time": "0:15:17", "remaining_time": "0:16:34"}
|
49 |
+
{"current_steps": 250, "total_steps": 500, "loss": 0.0077, "lr": 0.00011736481776669306, "epoch": 2.2222222222222223, "percentage": 50.0, "elapsed_time": "0:15:46", "remaining_time": "0:15:46"}
|
50 |
+
{"current_steps": 250, "total_steps": 500, "eval_loss": 0.007150276098400354, "epoch": 2.2222222222222223, "percentage": 50.0, "elapsed_time": "0:15:55", "remaining_time": "0:15:55"}
|
51 |
+
{"current_steps": 260, "total_steps": 500, "loss": 0.0141, "lr": 0.00011045284632676536, "epoch": 2.311111111111111, "percentage": 52.0, "elapsed_time": "0:16:23", "remaining_time": "0:15:08"}
|
52 |
+
{"current_steps": 260, "total_steps": 500, "eval_loss": 0.003109171986579895, "epoch": 2.311111111111111, "percentage": 52.0, "elapsed_time": "0:16:33", "remaining_time": "0:15:16"}
|
53 |
+
{"current_steps": 270, "total_steps": 500, "loss": 0.0017, "lr": 0.00010348994967025012, "epoch": 2.4, "percentage": 54.0, "elapsed_time": "0:17:01", "remaining_time": "0:14:30"}
|
54 |
+
{"current_steps": 270, "total_steps": 500, "eval_loss": 0.012033730745315552, "epoch": 2.4, "percentage": 54.0, "elapsed_time": "0:17:10", "remaining_time": "0:14:38"}
|
55 |
+
{"current_steps": 280, "total_steps": 500, "loss": 0.0015, "lr": 9.651005032974994e-05, "epoch": 2.488888888888889, "percentage": 56.0, "elapsed_time": "0:17:39", "remaining_time": "0:13:52"}
|
56 |
+
{"current_steps": 280, "total_steps": 500, "eval_loss": 0.015280201099812984, "epoch": 2.488888888888889, "percentage": 56.0, "elapsed_time": "0:17:48", "remaining_time": "0:13:59"}
|
57 |
+
{"current_steps": 290, "total_steps": 500, "loss": 0.0126, "lr": 8.954715367323468e-05, "epoch": 2.5777777777777775, "percentage": 58.0, "elapsed_time": "0:18:16", "remaining_time": "0:13:14"}
|
58 |
+
{"current_steps": 290, "total_steps": 500, "eval_loss": 0.01406156551092863, "epoch": 2.5777777777777775, "percentage": 58.0, "elapsed_time": "0:18:26", "remaining_time": "0:13:21"}
|
59 |
+
{"current_steps": 300, "total_steps": 500, "loss": 0.0043, "lr": 8.263518223330697e-05, "epoch": 2.6666666666666665, "percentage": 60.0, "elapsed_time": "0:18:54", "remaining_time": "0:12:36"}
|
60 |
+
{"current_steps": 300, "total_steps": 500, "eval_loss": 0.0021963752806186676, "epoch": 2.6666666666666665, "percentage": 60.0, "elapsed_time": "0:19:03", "remaining_time": "0:12:42"}
|
61 |
+
{"current_steps": 310, "total_steps": 500, "loss": 0.0068, "lr": 7.580781044003324e-05, "epoch": 2.7555555555555555, "percentage": 62.0, "elapsed_time": "0:19:32", "remaining_time": "0:11:58"}
|
62 |
+
{"current_steps": 310, "total_steps": 500, "eval_loss": 0.001871286309324205, "epoch": 2.7555555555555555, "percentage": 62.0, "elapsed_time": "0:19:41", "remaining_time": "0:12:04"}
|
63 |
+
{"current_steps": 320, "total_steps": 500, "loss": 0.0018, "lr": 6.909830056250527e-05, "epoch": 2.8444444444444446, "percentage": 64.0, "elapsed_time": "0:20:10", "remaining_time": "0:11:20"}
|
64 |
+
{"current_steps": 320, "total_steps": 500, "eval_loss": 0.002184124430641532, "epoch": 2.8444444444444446, "percentage": 64.0, "elapsed_time": "0:20:19", "remaining_time": "0:11:26"}
|
65 |
+
{"current_steps": 330, "total_steps": 500, "loss": 0.0026, "lr": 6.25393406584088e-05, "epoch": 2.9333333333333336, "percentage": 66.0, "elapsed_time": "0:20:48", "remaining_time": "0:10:43"}
|
66 |
+
{"current_steps": 330, "total_steps": 500, "eval_loss": 0.0034216546919196844, "epoch": 2.9333333333333336, "percentage": 66.0, "elapsed_time": "0:20:58", "remaining_time": "0:10:48"}
|
67 |
+
{"current_steps": 340, "total_steps": 500, "loss": 0.0017, "lr": 5.616288532109225e-05, "epoch": 3.022222222222222, "percentage": 68.0, "elapsed_time": "0:21:26", "remaining_time": "0:10:05"}
|
68 |
+
{"current_steps": 340, "total_steps": 500, "eval_loss": 0.007565508596599102, "epoch": 3.022222222222222, "percentage": 68.0, "elapsed_time": "0:21:36", "remaining_time": "0:10:09"}
|
69 |
+
{"current_steps": 350, "total_steps": 500, "loss": 0.0002, "lr": 5.000000000000002e-05, "epoch": 3.111111111111111, "percentage": 70.0, "elapsed_time": "0:22:04", "remaining_time": "0:09:27"}
|
70 |
+
{"current_steps": 350, "total_steps": 500, "eval_loss": 0.010203778743743896, "epoch": 3.111111111111111, "percentage": 70.0, "elapsed_time": "0:22:14", "remaining_time": "0:09:31"}
|
71 |
+
{"current_steps": 360, "total_steps": 500, "loss": 0.0004, "lr": 4.4080709652925336e-05, "epoch": 3.2, "percentage": 72.0, "elapsed_time": "0:22:42", "remaining_time": "0:08:49"}
|
72 |
+
{"current_steps": 360, "total_steps": 500, "eval_loss": 0.011154056526720524, "epoch": 3.2, "percentage": 72.0, "elapsed_time": "0:22:51", "remaining_time": "0:08:53"}
|
73 |
+
{"current_steps": 370, "total_steps": 500, "loss": 0.006, "lr": 3.843385246743417e-05, "epoch": 3.2888888888888888, "percentage": 74.0, "elapsed_time": "0:23:20", "remaining_time": "0:08:11"}
|
74 |
+
{"current_steps": 370, "total_steps": 500, "eval_loss": 0.00937813799828291, "epoch": 3.2888888888888888, "percentage": 74.0, "elapsed_time": "0:23:29", "remaining_time": "0:08:15"}
|
75 |
+
{"current_steps": 380, "total_steps": 500, "loss": 0.0003, "lr": 3.308693936411421e-05, "epoch": 3.3777777777777778, "percentage": 76.0, "elapsed_time": "0:23:58", "remaining_time": "0:07:34"}
|
76 |
+
{"current_steps": 380, "total_steps": 500, "eval_loss": 0.007455301936715841, "epoch": 3.3777777777777778, "percentage": 76.0, "elapsed_time": "0:24:07", "remaining_time": "0:07:37"}
|
77 |
+
{"current_steps": 390, "total_steps": 500, "loss": 0.0003, "lr": 2.8066019966134904e-05, "epoch": 3.466666666666667, "percentage": 78.0, "elapsed_time": "0:24:36", "remaining_time": "0:06:56"}
|
78 |
+
{"current_steps": 390, "total_steps": 500, "eval_loss": 0.006887929514050484, "epoch": 3.466666666666667, "percentage": 78.0, "elapsed_time": "0:24:45", "remaining_time": "0:06:59"}
|
79 |
+
{"current_steps": 400, "total_steps": 500, "loss": 0.0002, "lr": 2.339555568810221e-05, "epoch": 3.5555555555555554, "percentage": 80.0, "elapsed_time": "0:25:14", "remaining_time": "0:06:18"}
|
80 |
+
{"current_steps": 400, "total_steps": 500, "eval_loss": 0.006745634134858847, "epoch": 3.5555555555555554, "percentage": 80.0, "elapsed_time": "0:25:23", "remaining_time": "0:06:20"}
|
81 |
+
{"current_steps": 410, "total_steps": 500, "loss": 0.0005, "lr": 1.9098300562505266e-05, "epoch": 3.6444444444444444, "percentage": 82.0, "elapsed_time": "0:25:52", "remaining_time": "0:05:40"}
|
82 |
+
{"current_steps": 410, "total_steps": 500, "eval_loss": 0.006609635427594185, "epoch": 3.6444444444444444, "percentage": 82.0, "elapsed_time": "0:26:02", "remaining_time": "0:05:42"}
|
83 |
+
{"current_steps": 420, "total_steps": 500, "loss": 0.0003, "lr": 1.5195190384357404e-05, "epoch": 3.7333333333333334, "percentage": 84.0, "elapsed_time": "0:26:30", "remaining_time": "0:05:02"}
|
84 |
+
{"current_steps": 420, "total_steps": 500, "eval_loss": 0.007157918065786362, "epoch": 3.7333333333333334, "percentage": 84.0, "elapsed_time": "0:26:40", "remaining_time": "0:05:04"}
|
85 |
+
{"current_steps": 430, "total_steps": 500, "loss": 0.0037, "lr": 1.1705240714107302e-05, "epoch": 3.822222222222222, "percentage": 86.0, "elapsed_time": "0:27:08", "remaining_time": "0:04:25"}
|
86 |
+
{"current_steps": 430, "total_steps": 500, "eval_loss": 0.006302958354353905, "epoch": 3.822222222222222, "percentage": 86.0, "elapsed_time": "0:27:17", "remaining_time": "0:04:26"}
|
87 |
+
{"current_steps": 440, "total_steps": 500, "loss": 0.004, "lr": 8.645454235739903e-06, "epoch": 3.911111111111111, "percentage": 88.0, "elapsed_time": "0:27:46", "remaining_time": "0:03:47"}
|
88 |
+
{"current_steps": 440, "total_steps": 500, "eval_loss": 0.005341523326933384, "epoch": 3.911111111111111, "percentage": 88.0, "elapsed_time": "0:27:56", "remaining_time": "0:03:48"}
|
89 |
+
{"current_steps": 450, "total_steps": 500, "loss": 0.0003, "lr": 6.030737921409169e-06, "epoch": 4.0, "percentage": 90.0, "elapsed_time": "0:28:24", "remaining_time": "0:03:09"}
|
90 |
+
{"current_steps": 450, "total_steps": 500, "eval_loss": 0.005209792871028185, "epoch": 4.0, "percentage": 90.0, "elapsed_time": "0:28:33", "remaining_time": "0:03:10"}
|
91 |
+
{"current_steps": 460, "total_steps": 500, "loss": 0.0002, "lr": 3.873830406168111e-06, "epoch": 4.088888888888889, "percentage": 92.0, "elapsed_time": "0:29:02", "remaining_time": "0:02:31"}
|
92 |
+
{"current_steps": 460, "total_steps": 500, "eval_loss": 0.005074501037597656, "epoch": 4.088888888888889, "percentage": 92.0, "elapsed_time": "0:29:11", "remaining_time": "0:02:32"}
|
93 |
+
{"current_steps": 470, "total_steps": 500, "loss": 0.0002, "lr": 2.1852399266194314e-06, "epoch": 4.177777777777778, "percentage": 94.0, "elapsed_time": "0:29:39", "remaining_time": "0:01:53"}
|
94 |
+
{"current_steps": 470, "total_steps": 500, "eval_loss": 0.004982742480933666, "epoch": 4.177777777777778, "percentage": 94.0, "elapsed_time": "0:29:49", "remaining_time": "0:01:54"}
|
95 |
+
{"current_steps": 480, "total_steps": 500, "loss": 0.0006, "lr": 9.731931258429638e-07, "epoch": 4.266666666666667, "percentage": 96.0, "elapsed_time": "0:30:18", "remaining_time": "0:01:15"}
|
96 |
+
{"current_steps": 480, "total_steps": 500, "eval_loss": 0.004874282516539097, "epoch": 4.266666666666667, "percentage": 96.0, "elapsed_time": "0:30:27", "remaining_time": "0:01:16"}
|
97 |
+
{"current_steps": 490, "total_steps": 500, "loss": 0.0005, "lr": 2.4359497401758024e-07, "epoch": 4.355555555555555, "percentage": 98.0, "elapsed_time": "0:30:56", "remaining_time": "0:00:37"}
|
98 |
+
{"current_steps": 490, "total_steps": 500, "eval_loss": 0.004786411300301552, "epoch": 4.355555555555555, "percentage": 98.0, "elapsed_time": "0:31:05", "remaining_time": "0:00:38"}
|
99 |
+
{"current_steps": 500, "total_steps": 500, "loss": 0.0002, "lr": 0.0, "epoch": 4.444444444444445, "percentage": 100.0, "elapsed_time": "0:31:34", "remaining_time": "0:00:00"}
|
100 |
+
{"current_steps": 500, "total_steps": 500, "eval_loss": 0.005105508491396904, "epoch": 4.444444444444445, "percentage": 100.0, "elapsed_time": "0:31:43", "remaining_time": "0:00:00"}
|
101 |
+
{"current_steps": 500, "total_steps": 500, "epoch": 4.444444444444445, "percentage": 100.0, "elapsed_time": "0:31:44", "remaining_time": "0:00:00"}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5496
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d88c4586c8679b42dadab6f28d2b3db982278bdca6b97346f489f05c19d11cea
|
3 |
size 5496
|