trenkert commited on
Commit
e1502ee
1 Parent(s): f04bf38

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
24
- "k_proj",
25
- "up_proj",
26
  "q_proj",
 
27
  "down_proj",
28
  "v_proj",
29
- "o_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "q_proj",
24
+ "up_proj",
25
  "down_proj",
26
  "v_proj",
27
+ "k_proj",
28
+ "o_proj",
29
+ "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e25acd6a18f2aa9df4b20b43a636d55ad48cdae66cf3747e61e3dc6cb563261
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f1d98b6bd6627d637012c48eb3fd7b4b3d119855dc5037821599bc3b7fd82d
3
  size 83945296
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.935483870967742,
3
  "total_flos": 3871080172486656.0,
4
- "train_loss": 0.5972379109133845,
5
- "train_runtime": 866.5062,
6
- "train_samples_per_second": 1.288,
7
- "train_steps_per_second": 0.159
8
  }
 
1
  {
2
  "epoch": 5.935483870967742,
3
  "total_flos": 3871080172486656.0,
4
+ "train_loss": 0.597086531744487,
5
+ "train_runtime": 849.7455,
6
+ "train_samples_per_second": 1.313,
7
+ "train_steps_per_second": 0.162
8
  }
checkpoint-138/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
24
- "k_proj",
25
- "up_proj",
26
  "q_proj",
 
27
  "down_proj",
28
  "v_proj",
29
- "o_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "q_proj",
24
+ "up_proj",
25
  "down_proj",
26
  "v_proj",
27
+ "k_proj",
28
+ "o_proj",
29
+ "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
checkpoint-138/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e25acd6a18f2aa9df4b20b43a636d55ad48cdae66cf3747e61e3dc6cb563261
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f1d98b6bd6627d637012c48eb3fd7b4b3d119855dc5037821599bc3b7fd82d
3
  size 83945296
checkpoint-138/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:191f8cca402f31422fb4a85ac833d9d62b98c5a925578c3809836b2e73903492
3
  size 168149394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7837df7a48210c4fe030cfa59bb94edf4febe94cdbf20b9fda64ef8f61b3a98f
3
  size 168149394
checkpoint-138/trainer_state.json CHANGED
@@ -10,93 +10,93 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.43010752688172044,
13
- "grad_norm": 2.766061782836914,
14
  "learning_rate": 1.785714285714286e-05,
15
- "loss": 3.3579,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8602150537634409,
20
- "grad_norm": 1.7068594694137573,
21
  "learning_rate": 3.571428571428572e-05,
22
- "loss": 1.5449,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.2903225806451613,
27
- "grad_norm": 1.2398000955581665,
28
  "learning_rate": 4.995922759815339e-05,
29
- "loss": 0.896,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.7204301075268817,
34
- "grad_norm": 0.8611450791358948,
35
  "learning_rate": 4.854610909098812e-05,
36
- "loss": 0.7327,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.150537634408602,
41
- "grad_norm": 0.7871934771537781,
42
  "learning_rate": 4.522542485937369e-05,
43
- "loss": 0.5126,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.5806451612903225,
48
- "grad_norm": 1.3395262956619263,
49
  "learning_rate": 4.0266196990885955e-05,
50
- "loss": 0.3244,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 3.010752688172043,
55
- "grad_norm": 0.8698714375495911,
56
  "learning_rate": 3.4070192633766025e-05,
57
- "loss": 0.305,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.4408602150537635,
62
- "grad_norm": 0.6530447006225586,
63
  "learning_rate": 2.7139375211970996e-05,
64
- "loss": 0.1564,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.870967741935484,
69
- "grad_norm": 0.8609829545021057,
70
  "learning_rate": 2.003523833385637e-05,
71
- "loss": 0.1377,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.301075268817204,
76
- "grad_norm": 0.3412129282951355,
77
  "learning_rate": 1.3333316919358157e-05,
78
- "loss": 0.0779,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 4.731182795698925,
83
- "grad_norm": 0.557332456111908,
84
  "learning_rate": 7.576560783617668e-06,
85
- "loss": 0.0658,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 5.161290322580645,
90
- "grad_norm": 1.0119709968566895,
91
  "learning_rate": 3.2313480720055745e-06,
92
- "loss": 0.0629,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 5.591397849462366,
97
- "grad_norm": 0.2670357823371887,
98
  "learning_rate": 6.497020764416633e-07,
99
- "loss": 0.0347,
100
  "step": 130
101
  }
102
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.43010752688172044,
13
+ "grad_norm": 2.765007972717285,
14
  "learning_rate": 1.785714285714286e-05,
15
+ "loss": 3.3581,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8602150537634409,
20
+ "grad_norm": 1.702070713043213,
21
  "learning_rate": 3.571428571428572e-05,
22
+ "loss": 1.5448,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.2903225806451613,
27
+ "grad_norm": 1.2462737560272217,
28
  "learning_rate": 4.995922759815339e-05,
29
+ "loss": 0.8959,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.7204301075268817,
34
+ "grad_norm": 0.8611997365951538,
35
  "learning_rate": 4.854610909098812e-05,
36
+ "loss": 0.7328,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.150537634408602,
41
+ "grad_norm": 0.799420177936554,
42
  "learning_rate": 4.522542485937369e-05,
43
+ "loss": 0.5115,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.5806451612903225,
48
+ "grad_norm": 1.2618942260742188,
49
  "learning_rate": 4.0266196990885955e-05,
50
+ "loss": 0.326,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 3.010752688172043,
55
+ "grad_norm": 0.8298829197883606,
56
  "learning_rate": 3.4070192633766025e-05,
57
+ "loss": 0.3035,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.4408602150537635,
62
+ "grad_norm": 1.2379834651947021,
63
  "learning_rate": 2.7139375211970996e-05,
64
+ "loss": 0.1585,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.870967741935484,
69
+ "grad_norm": 0.8900007009506226,
70
  "learning_rate": 2.003523833385637e-05,
71
+ "loss": 0.1343,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.301075268817204,
76
+ "grad_norm": 0.3209846019744873,
77
  "learning_rate": 1.3333316919358157e-05,
78
+ "loss": 0.0785,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 4.731182795698925,
83
+ "grad_norm": 0.6245219111442566,
84
  "learning_rate": 7.576560783617668e-06,
85
+ "loss": 0.0657,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 5.161290322580645,
90
+ "grad_norm": 0.9348724484443665,
91
  "learning_rate": 3.2313480720055745e-06,
92
+ "loss": 0.0631,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 5.591397849462366,
97
+ "grad_norm": 0.2755952775478363,
98
  "learning_rate": 6.497020764416633e-07,
99
+ "loss": 0.0341,
100
  "step": 130
101
  }
102
  ],
checkpoint-138/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa102a25ed93818f0320b36531776c930373b97ab939677b5a8c91cda048d262
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8646d4a3d6bbf798daa136fe65071e072f6bfc40e35d3bacc930a5e4cc212740
3
  size 5368
runs/Sep27_13-22-25_018922eca950/events.out.tfevents.1727443416.018922eca950.4782.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79116f66d4db7413be82f4bb8a1fface37d5c77e3a0c66e020bd794d7c54857e
3
+ size 8737
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.935483870967742,
3
  "total_flos": 3871080172486656.0,
4
- "train_loss": 0.5972379109133845,
5
- "train_runtime": 866.5062,
6
- "train_samples_per_second": 1.288,
7
- "train_steps_per_second": 0.159
8
  }
 
1
  {
2
  "epoch": 5.935483870967742,
3
  "total_flos": 3871080172486656.0,
4
+ "train_loss": 0.597086531744487,
5
+ "train_runtime": 849.7455,
6
+ "train_samples_per_second": 1.313,
7
+ "train_steps_per_second": 0.162
8
  }
trainer_log.jsonl CHANGED
@@ -1,14 +1,14 @@
1
- {"current_steps": 10, "total_steps": 138, "loss": 3.3579, "learning_rate": 1.785714285714286e-05, "epoch": 0.43010752688172044, "percentage": 7.25, "elapsed_time": "0:01:02", "remaining_time": "0:13:21"}
2
- {"current_steps": 20, "total_steps": 138, "loss": 1.5449, "learning_rate": 3.571428571428572e-05, "epoch": 0.8602150537634409, "percentage": 14.49, "elapsed_time": "0:02:05", "remaining_time": "0:12:23"}
3
- {"current_steps": 30, "total_steps": 138, "loss": 0.896, "learning_rate": 4.995922759815339e-05, "epoch": 1.2903225806451613, "percentage": 21.74, "elapsed_time": "0:03:08", "remaining_time": "0:11:20"}
4
- {"current_steps": 40, "total_steps": 138, "loss": 0.7327, "learning_rate": 4.854610909098812e-05, "epoch": 1.7204301075268817, "percentage": 28.99, "elapsed_time": "0:04:10", "remaining_time": "0:10:14"}
5
- {"current_steps": 50, "total_steps": 138, "loss": 0.5126, "learning_rate": 4.522542485937369e-05, "epoch": 2.150537634408602, "percentage": 36.23, "elapsed_time": "0:05:13", "remaining_time": "0:09:11"}
6
- {"current_steps": 60, "total_steps": 138, "loss": 0.3244, "learning_rate": 4.0266196990885955e-05, "epoch": 2.5806451612903225, "percentage": 43.48, "elapsed_time": "0:06:16", "remaining_time": "0:08:09"}
7
- {"current_steps": 70, "total_steps": 138, "loss": 0.305, "learning_rate": 3.4070192633766025e-05, "epoch": 3.010752688172043, "percentage": 50.72, "elapsed_time": "0:07:19", "remaining_time": "0:07:06"}
8
- {"current_steps": 80, "total_steps": 138, "loss": 0.1564, "learning_rate": 2.7139375211970996e-05, "epoch": 3.4408602150537635, "percentage": 57.97, "elapsed_time": "0:08:21", "remaining_time": "0:06:03"}
9
- {"current_steps": 90, "total_steps": 138, "loss": 0.1377, "learning_rate": 2.003523833385637e-05, "epoch": 3.870967741935484, "percentage": 65.22, "elapsed_time": "0:09:24", "remaining_time": "0:05:01"}
10
- {"current_steps": 100, "total_steps": 138, "loss": 0.0779, "learning_rate": 1.3333316919358157e-05, "epoch": 4.301075268817204, "percentage": 72.46, "elapsed_time": "0:10:26", "remaining_time": "0:03:58"}
11
- {"current_steps": 110, "total_steps": 138, "loss": 0.0658, "learning_rate": 7.576560783617668e-06, "epoch": 4.731182795698925, "percentage": 79.71, "elapsed_time": "0:11:29", "remaining_time": "0:02:55"}
12
- {"current_steps": 120, "total_steps": 138, "loss": 0.0629, "learning_rate": 3.2313480720055745e-06, "epoch": 5.161290322580645, "percentage": 86.96, "elapsed_time": "0:12:32", "remaining_time": "0:01:52"}
13
- {"current_steps": 130, "total_steps": 138, "loss": 0.0347, "learning_rate": 6.497020764416633e-07, "epoch": 5.591397849462366, "percentage": 94.2, "elapsed_time": "0:13:34", "remaining_time": "0:00:50"}
14
- {"current_steps": 138, "total_steps": 138, "epoch": 5.935483870967742, "percentage": 100.0, "elapsed_time": "0:14:26", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 138, "loss": 3.3581, "learning_rate": 1.785714285714286e-05, "epoch": 0.43010752688172044, "percentage": 7.25, "elapsed_time": "0:00:59", "remaining_time": "0:12:45"}
2
+ {"current_steps": 20, "total_steps": 138, "loss": 1.5448, "learning_rate": 3.571428571428572e-05, "epoch": 0.8602150537634409, "percentage": 14.49, "elapsed_time": "0:02:00", "remaining_time": "0:11:48"}
3
+ {"current_steps": 30, "total_steps": 138, "loss": 0.8959, "learning_rate": 4.995922759815339e-05, "epoch": 1.2903225806451613, "percentage": 21.74, "elapsed_time": "0:03:01", "remaining_time": "0:10:55"}
4
+ {"current_steps": 40, "total_steps": 138, "loss": 0.7328, "learning_rate": 4.854610909098812e-05, "epoch": 1.7204301075268817, "percentage": 28.99, "elapsed_time": "0:04:02", "remaining_time": "0:09:54"}
5
+ {"current_steps": 50, "total_steps": 138, "loss": 0.5115, "learning_rate": 4.522542485937369e-05, "epoch": 2.150537634408602, "percentage": 36.23, "elapsed_time": "0:05:03", "remaining_time": "0:08:54"}
6
+ {"current_steps": 60, "total_steps": 138, "loss": 0.326, "learning_rate": 4.0266196990885955e-05, "epoch": 2.5806451612903225, "percentage": 43.48, "elapsed_time": "0:06:05", "remaining_time": "0:07:54"}
7
+ {"current_steps": 70, "total_steps": 138, "loss": 0.3035, "learning_rate": 3.4070192633766025e-05, "epoch": 3.010752688172043, "percentage": 50.72, "elapsed_time": "0:07:06", "remaining_time": "0:06:54"}
8
+ {"current_steps": 80, "total_steps": 138, "loss": 0.1585, "learning_rate": 2.7139375211970996e-05, "epoch": 3.4408602150537635, "percentage": 57.97, "elapsed_time": "0:08:07", "remaining_time": "0:05:53"}
9
+ {"current_steps": 90, "total_steps": 138, "loss": 0.1343, "learning_rate": 2.003523833385637e-05, "epoch": 3.870967741935484, "percentage": 65.22, "elapsed_time": "0:09:09", "remaining_time": "0:04:53"}
10
+ {"current_steps": 100, "total_steps": 138, "loss": 0.0785, "learning_rate": 1.3333316919358157e-05, "epoch": 4.301075268817204, "percentage": 72.46, "elapsed_time": "0:10:10", "remaining_time": "0:03:52"}
11
+ {"current_steps": 110, "total_steps": 138, "loss": 0.0657, "learning_rate": 7.576560783617668e-06, "epoch": 4.731182795698925, "percentage": 79.71, "elapsed_time": "0:11:11", "remaining_time": "0:02:50"}
12
+ {"current_steps": 120, "total_steps": 138, "loss": 0.0631, "learning_rate": 3.2313480720055745e-06, "epoch": 5.161290322580645, "percentage": 86.96, "elapsed_time": "0:12:13", "remaining_time": "0:01:49"}
13
+ {"current_steps": 130, "total_steps": 138, "loss": 0.0341, "learning_rate": 6.497020764416633e-07, "epoch": 5.591397849462366, "percentage": 94.2, "elapsed_time": "0:13:14", "remaining_time": "0:00:48"}
14
+ {"current_steps": 138, "total_steps": 138, "epoch": 5.935483870967742, "percentage": 100.0, "elapsed_time": "0:14:09", "remaining_time": "0:00:00"}
trainer_state.json CHANGED
@@ -10,103 +10,103 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.43010752688172044,
13
- "grad_norm": 2.766061782836914,
14
  "learning_rate": 1.785714285714286e-05,
15
- "loss": 3.3579,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8602150537634409,
20
- "grad_norm": 1.7068594694137573,
21
  "learning_rate": 3.571428571428572e-05,
22
- "loss": 1.5449,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.2903225806451613,
27
- "grad_norm": 1.2398000955581665,
28
  "learning_rate": 4.995922759815339e-05,
29
- "loss": 0.896,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.7204301075268817,
34
- "grad_norm": 0.8611450791358948,
35
  "learning_rate": 4.854610909098812e-05,
36
- "loss": 0.7327,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.150537634408602,
41
- "grad_norm": 0.7871934771537781,
42
  "learning_rate": 4.522542485937369e-05,
43
- "loss": 0.5126,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.5806451612903225,
48
- "grad_norm": 1.3395262956619263,
49
  "learning_rate": 4.0266196990885955e-05,
50
- "loss": 0.3244,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 3.010752688172043,
55
- "grad_norm": 0.8698714375495911,
56
  "learning_rate": 3.4070192633766025e-05,
57
- "loss": 0.305,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.4408602150537635,
62
- "grad_norm": 0.6530447006225586,
63
  "learning_rate": 2.7139375211970996e-05,
64
- "loss": 0.1564,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.870967741935484,
69
- "grad_norm": 0.8609829545021057,
70
  "learning_rate": 2.003523833385637e-05,
71
- "loss": 0.1377,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.301075268817204,
76
- "grad_norm": 0.3412129282951355,
77
  "learning_rate": 1.3333316919358157e-05,
78
- "loss": 0.0779,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 4.731182795698925,
83
- "grad_norm": 0.557332456111908,
84
  "learning_rate": 7.576560783617668e-06,
85
- "loss": 0.0658,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 5.161290322580645,
90
- "grad_norm": 1.0119709968566895,
91
  "learning_rate": 3.2313480720055745e-06,
92
- "loss": 0.0629,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 5.591397849462366,
97
- "grad_norm": 0.2670357823371887,
98
  "learning_rate": 6.497020764416633e-07,
99
- "loss": 0.0347,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 5.935483870967742,
104
  "step": 138,
105
  "total_flos": 3871080172486656.0,
106
- "train_loss": 0.5972379109133845,
107
- "train_runtime": 866.5062,
108
- "train_samples_per_second": 1.288,
109
- "train_steps_per_second": 0.159
110
  }
111
  ],
112
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.43010752688172044,
13
+ "grad_norm": 2.765007972717285,
14
  "learning_rate": 1.785714285714286e-05,
15
+ "loss": 3.3581,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8602150537634409,
20
+ "grad_norm": 1.702070713043213,
21
  "learning_rate": 3.571428571428572e-05,
22
+ "loss": 1.5448,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.2903225806451613,
27
+ "grad_norm": 1.2462737560272217,
28
  "learning_rate": 4.995922759815339e-05,
29
+ "loss": 0.8959,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.7204301075268817,
34
+ "grad_norm": 0.8611997365951538,
35
  "learning_rate": 4.854610909098812e-05,
36
+ "loss": 0.7328,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.150537634408602,
41
+ "grad_norm": 0.799420177936554,
42
  "learning_rate": 4.522542485937369e-05,
43
+ "loss": 0.5115,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.5806451612903225,
48
+ "grad_norm": 1.2618942260742188,
49
  "learning_rate": 4.0266196990885955e-05,
50
+ "loss": 0.326,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 3.010752688172043,
55
+ "grad_norm": 0.8298829197883606,
56
  "learning_rate": 3.4070192633766025e-05,
57
+ "loss": 0.3035,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.4408602150537635,
62
+ "grad_norm": 1.2379834651947021,
63
  "learning_rate": 2.7139375211970996e-05,
64
+ "loss": 0.1585,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.870967741935484,
69
+ "grad_norm": 0.8900007009506226,
70
  "learning_rate": 2.003523833385637e-05,
71
+ "loss": 0.1343,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.301075268817204,
76
+ "grad_norm": 0.3209846019744873,
77
  "learning_rate": 1.3333316919358157e-05,
78
+ "loss": 0.0785,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 4.731182795698925,
83
+ "grad_norm": 0.6245219111442566,
84
  "learning_rate": 7.576560783617668e-06,
85
+ "loss": 0.0657,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 5.161290322580645,
90
+ "grad_norm": 0.9348724484443665,
91
  "learning_rate": 3.2313480720055745e-06,
92
+ "loss": 0.0631,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 5.591397849462366,
97
+ "grad_norm": 0.2755952775478363,
98
  "learning_rate": 6.497020764416633e-07,
99
+ "loss": 0.0341,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 5.935483870967742,
104
  "step": 138,
105
  "total_flos": 3871080172486656.0,
106
+ "train_loss": 0.597086531744487,
107
+ "train_runtime": 849.7455,
108
+ "train_samples_per_second": 1.313,
109
+ "train_steps_per_second": 0.162
110
  }
111
  ],
112
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa102a25ed93818f0320b36531776c930373b97ab939677b5a8c91cda048d262
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8646d4a3d6bbf798daa136fe65071e072f6bfc40e35d3bacc930a5e4cc212740
3
  size 5368