lectura commited on
Commit
fe51da7
1 Parent(s): fbe05ff

Training in progress, epoch 2, checkpoint

Browse files
checkpoint-24/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7f78bc5ea7f4252d449721502cb917964df910514248d9c38bbd319537af913
3
  size 482171848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e629d9af878b91a50f76066a24d4341b70142f260fe704ea5b38cc90d28479f4
3
  size 482171848
checkpoint-24/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2923b0e54a5c211188c14ba7405fa9d24db1b76d8edd86531e13abdf3457efac
3
  size 964412154
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:090c04cb70716eee6c26b1fafc5f345bb9d12db84727f777cae58a27c1e41b1d
3
  size 964412154
checkpoint-24/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03aeece81560affa84dae04d2900877c560b5f1ad4d9ae0078fb00fadcbd9684
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17c2c7ff34ed3fd287d125d27abf4fdde9643db0dadf9873052e2f40d2ceb990
3
  size 1064
checkpoint-24/trainer_state.json CHANGED
@@ -10,152 +10,152 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.12,
13
- "learning_rate": 9.937500000000001e-06,
14
  "loss": 12.7837,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.25,
19
- "learning_rate": 9.875000000000001e-06,
20
  "loss": 12.4595,
21
  "step": 2
22
  },
23
  {
24
  "epoch": 0.37,
25
- "learning_rate": 9.8125e-06,
26
- "loss": 11.9121,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.49,
31
- "learning_rate": 9.75e-06,
32
- "loss": 11.6628,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.62,
37
- "learning_rate": 9.6875e-06,
38
- "loss": 11.4327,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.74,
43
- "learning_rate": 9.625e-06,
44
- "loss": 11.1488,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.86,
49
- "learning_rate": 9.562500000000002e-06,
50
- "loss": 10.9209,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.98,
55
- "learning_rate": 9.5e-06,
56
- "loss": 10.8448,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 1.11,
61
- "learning_rate": 9.4375e-06,
62
- "loss": 10.531,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 1.23,
67
- "learning_rate": 9.375000000000001e-06,
68
- "loss": 10.3767,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 1.35,
73
- "learning_rate": 9.312500000000001e-06,
74
- "loss": 10.201,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 1.48,
79
- "learning_rate": 9.250000000000001e-06,
80
- "loss": 10.1096,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 1.6,
85
- "learning_rate": 9.1875e-06,
86
- "loss": 9.9072,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 1.72,
91
- "learning_rate": 9.125e-06,
92
- "loss": 9.7853,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 1.85,
97
- "learning_rate": 9.0625e-06,
98
- "loss": 9.6567,
99
  "step": 15
100
  },
101
  {
102
  "epoch": 1.97,
103
- "learning_rate": 9e-06,
104
- "loss": 9.4543,
105
  "step": 16
106
  },
107
  {
108
  "epoch": 2.09,
109
- "learning_rate": 8.9375e-06,
110
- "loss": 9.3335,
111
  "step": 17
112
  },
113
  {
114
  "epoch": 2.22,
115
- "learning_rate": 8.875e-06,
116
- "loss": 9.2063,
117
  "step": 18
118
  },
119
  {
120
  "epoch": 2.34,
121
- "learning_rate": 8.8125e-06,
122
- "loss": 9.1388,
123
  "step": 19
124
  },
125
  {
126
  "epoch": 2.46,
127
- "learning_rate": 8.750000000000001e-06,
128
- "loss": 8.9317,
129
  "step": 20
130
  },
131
  {
132
  "epoch": 2.58,
133
- "learning_rate": 8.687500000000001e-06,
134
- "loss": 8.8246,
135
  "step": 21
136
  },
137
  {
138
  "epoch": 2.71,
139
- "learning_rate": 8.625000000000001e-06,
140
- "loss": 8.7073,
141
  "step": 22
142
  },
143
  {
144
  "epoch": 2.83,
145
- "learning_rate": 8.5625e-06,
146
- "loss": 8.5972,
147
  "step": 23
148
  },
149
  {
150
  "epoch": 2.95,
151
- "learning_rate": 8.5e-06,
152
- "loss": 8.408,
153
  "step": 24
154
  }
155
  ],
156
  "logging_steps": 1,
157
- "max_steps": 160,
158
- "num_train_epochs": 20,
159
  "save_steps": 500,
160
  "total_flos": 1619109035900928.0,
161
  "trial_name": null,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.12,
13
+ "learning_rate": 9.975000000000002e-06,
14
  "loss": 12.7837,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.25,
19
+ "learning_rate": 9.950000000000001e-06,
20
  "loss": 12.4595,
21
  "step": 2
22
  },
23
  {
24
  "epoch": 0.37,
25
+ "learning_rate": 9.925e-06,
26
+ "loss": 11.9117,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.49,
31
+ "learning_rate": 9.9e-06,
32
+ "loss": 11.662,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.62,
37
+ "learning_rate": 9.875000000000001e-06,
38
+ "loss": 11.4278,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.74,
43
+ "learning_rate": 9.85e-06,
44
+ "loss": 11.1398,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.86,
49
+ "learning_rate": 9.825000000000002e-06,
50
+ "loss": 10.9135,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.98,
55
+ "learning_rate": 9.800000000000001e-06,
56
+ "loss": 10.8274,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 1.11,
61
+ "learning_rate": 9.775e-06,
62
+ "loss": 10.5068,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 1.23,
67
+ "learning_rate": 9.75e-06,
68
+ "loss": 10.3551,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 1.35,
73
+ "learning_rate": 9.725000000000001e-06,
74
+ "loss": 10.1776,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 1.48,
79
+ "learning_rate": 9.7e-06,
80
+ "loss": 10.0708,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 1.6,
85
+ "learning_rate": 9.675000000000001e-06,
86
+ "loss": 9.8703,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 1.72,
91
+ "learning_rate": 9.65e-06,
92
+ "loss": 9.7534,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 1.85,
97
+ "learning_rate": 9.625e-06,
98
+ "loss": 9.6094,
99
  "step": 15
100
  },
101
  {
102
  "epoch": 1.97,
103
+ "learning_rate": 9.600000000000001e-06,
104
+ "loss": 9.3989,
105
  "step": 16
106
  },
107
  {
108
  "epoch": 2.09,
109
+ "learning_rate": 9.575e-06,
110
+ "loss": 9.2753,
111
  "step": 17
112
  },
113
  {
114
  "epoch": 2.22,
115
+ "learning_rate": 9.55e-06,
116
+ "loss": 9.138,
117
  "step": 18
118
  },
119
  {
120
  "epoch": 2.34,
121
+ "learning_rate": 9.525000000000001e-06,
122
+ "loss": 9.0598,
123
  "step": 19
124
  },
125
  {
126
  "epoch": 2.46,
127
+ "learning_rate": 9.5e-06,
128
+ "loss": 8.8535,
129
  "step": 20
130
  },
131
  {
132
  "epoch": 2.58,
133
+ "learning_rate": 9.475000000000002e-06,
134
+ "loss": 8.7378,
135
  "step": 21
136
  },
137
  {
138
  "epoch": 2.71,
139
+ "learning_rate": 9.450000000000001e-06,
140
+ "loss": 8.6082,
141
  "step": 22
142
  },
143
  {
144
  "epoch": 2.83,
145
+ "learning_rate": 9.425e-06,
146
+ "loss": 8.4904,
147
  "step": 23
148
  },
149
  {
150
  "epoch": 2.95,
151
+ "learning_rate": 9.4e-06,
152
+ "loss": 8.2918,
153
  "step": 24
154
  }
155
  ],
156
  "logging_steps": 1,
157
+ "max_steps": 400,
158
+ "num_train_epochs": 50,
159
  "save_steps": 500,
160
  "total_flos": 1619109035900928.0,
161
  "trial_name": null,
checkpoint-24/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a6fef67a6d9ec40dd5de4f4428a33ef7c86345cb77990f465d9680f0009f594
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60568d4dd0f6572abe8a02d69c2d1f0c25627e4687509d3e2f27d2eb514e2587
3
  size 4600