Andrija commited on
Commit
c996931
1 Parent(s): c05db31

Commit from Andrija

Browse files
Files changed (6) hide show
  1. config.json +23 -0
  2. log_history.json +282 -0
  3. merges.txt +0 -0
  4. pytorch_model.bin +3 -0
  5. training_args.bin +3 -0
  6. vocab.json +0 -0
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "roberta",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 1,
20
+ "total_flos": 86931019038720000,
21
+ "type_vocab_size": 1,
22
+ "vocab_size": 100000
23
+ }
log_history.json ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 8.7351650390625,
4
+ "learning_rate": 4.875491807360925e-05,
5
+ "epoch": 0.02490163852781513,
6
+ "total_flos": 2173275475968000,
7
+ "step": 500
8
+ },
9
+ {
10
+ "loss": 8.1599990234375,
11
+ "learning_rate": 4.750983614721849e-05,
12
+ "epoch": 0.04980327705563026,
13
+ "total_flos": 4346550951936000,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "loss": 7.94335546875,
18
+ "learning_rate": 4.6264754220827736e-05,
19
+ "epoch": 0.07470491558344539,
20
+ "total_flos": 6519826427904000,
21
+ "step": 1500
22
+ },
23
+ {
24
+ "loss": 7.817630859375,
25
+ "learning_rate": 4.5019672294436976e-05,
26
+ "epoch": 0.09960655411126051,
27
+ "total_flos": 8693101903872000,
28
+ "step": 2000
29
+ },
30
+ {
31
+ "loss": 7.721162109375,
32
+ "learning_rate": 4.377459036804622e-05,
33
+ "epoch": 0.12450819263907566,
34
+ "total_flos": 10866377379840000,
35
+ "step": 2500
36
+ },
37
+ {
38
+ "loss": 7.63598046875,
39
+ "learning_rate": 4.252950844165546e-05,
40
+ "epoch": 0.14940983116689077,
41
+ "total_flos": 13039652855808000,
42
+ "step": 3000
43
+ },
44
+ {
45
+ "loss": 7.557828125,
46
+ "learning_rate": 4.128442651526471e-05,
47
+ "epoch": 0.1743114696947059,
48
+ "total_flos": 15212928331776000,
49
+ "step": 3500
50
+ },
51
+ {
52
+ "loss": 7.47056640625,
53
+ "learning_rate": 4.003934458887395e-05,
54
+ "epoch": 0.19921310822252103,
55
+ "total_flos": 17386203807744000,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "loss": 7.40528125,
60
+ "learning_rate": 3.8794262662483196e-05,
61
+ "epoch": 0.22411474675033619,
62
+ "total_flos": 19559479283712000,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "loss": 7.32109375,
67
+ "learning_rate": 3.7549180736092436e-05,
68
+ "epoch": 0.24901638527815131,
69
+ "total_flos": 21732754759680000,
70
+ "step": 5000
71
+ },
72
+ {
73
+ "loss": 7.1830703125,
74
+ "learning_rate": 3.630409880970168e-05,
75
+ "epoch": 0.2739180238059664,
76
+ "total_flos": 23906030235648000,
77
+ "step": 5500
78
+ },
79
+ {
80
+ "loss": 7.075640625,
81
+ "learning_rate": 3.505901688331092e-05,
82
+ "epoch": 0.29881966233378154,
83
+ "total_flos": 26079305711616000,
84
+ "step": 6000
85
+ },
86
+ {
87
+ "loss": 6.9607734375,
88
+ "learning_rate": 3.381393495692017e-05,
89
+ "epoch": 0.3237213008615967,
90
+ "total_flos": 28252581187584000,
91
+ "step": 6500
92
+ },
93
+ {
94
+ "loss": 6.8397734375,
95
+ "learning_rate": 3.256885303052941e-05,
96
+ "epoch": 0.3486229393894118,
97
+ "total_flos": 30425856663552000,
98
+ "step": 7000
99
+ },
100
+ {
101
+ "loss": 6.7368984375,
102
+ "learning_rate": 3.1323771104138656e-05,
103
+ "epoch": 0.37352457791722693,
104
+ "total_flos": 32599132139520000,
105
+ "step": 7500
106
+ },
107
+ {
108
+ "loss": 6.6454296875,
109
+ "learning_rate": 3.00786891777479e-05,
110
+ "epoch": 0.39842621644504206,
111
+ "total_flos": 34772407615488000,
112
+ "step": 8000
113
+ },
114
+ {
115
+ "loss": 6.573265625,
116
+ "learning_rate": 2.8833607251357143e-05,
117
+ "epoch": 0.4233278549728572,
118
+ "total_flos": 36945683091456000,
119
+ "step": 8500
120
+ },
121
+ {
122
+ "loss": 6.4836328125,
123
+ "learning_rate": 2.7588525324966386e-05,
124
+ "epoch": 0.44822949350067237,
125
+ "total_flos": 39118958567424000,
126
+ "step": 9000
127
+ },
128
+ {
129
+ "loss": 6.418890625,
130
+ "learning_rate": 2.634344339857563e-05,
131
+ "epoch": 0.4731311320284875,
132
+ "total_flos": 41292234043392000,
133
+ "step": 9500
134
+ },
135
+ {
136
+ "loss": 6.3506875,
137
+ "learning_rate": 2.5098361472184873e-05,
138
+ "epoch": 0.49803277055630263,
139
+ "total_flos": 43465509519360000,
140
+ "step": 10000
141
+ },
142
+ {
143
+ "loss": 6.298296875,
144
+ "learning_rate": 2.3853279545794113e-05,
145
+ "epoch": 0.5229344090841177,
146
+ "total_flos": 45638784995328000,
147
+ "step": 10500
148
+ },
149
+ {
150
+ "loss": 6.242953125,
151
+ "learning_rate": 2.2608197619403356e-05,
152
+ "epoch": 0.5478360476119328,
153
+ "total_flos": 47812060471296000,
154
+ "step": 11000
155
+ },
156
+ {
157
+ "loss": 6.193828125,
158
+ "learning_rate": 2.13631156930126e-05,
159
+ "epoch": 0.572737686139748,
160
+ "total_flos": 49985335947264000,
161
+ "step": 11500
162
+ },
163
+ {
164
+ "loss": 6.149265625,
165
+ "learning_rate": 2.0118033766621843e-05,
166
+ "epoch": 0.5976393246675631,
167
+ "total_flos": 52158611423232000,
168
+ "step": 12000
169
+ },
170
+ {
171
+ "loss": 6.112984375,
172
+ "learning_rate": 1.8872951840231086e-05,
173
+ "epoch": 0.6225409631953782,
174
+ "total_flos": 54331886899200000,
175
+ "step": 12500
176
+ },
177
+ {
178
+ "loss": 6.054078125,
179
+ "learning_rate": 1.762786991384033e-05,
180
+ "epoch": 0.6474426017231933,
181
+ "total_flos": 56505162375168000,
182
+ "step": 13000
183
+ },
184
+ {
185
+ "loss": 6.0306875,
186
+ "learning_rate": 1.6382787987449573e-05,
187
+ "epoch": 0.6723442402510085,
188
+ "total_flos": 58678437851136000,
189
+ "step": 13500
190
+ },
191
+ {
192
+ "loss": 5.998078125,
193
+ "learning_rate": 1.5137706061058818e-05,
194
+ "epoch": 0.6972458787788236,
195
+ "total_flos": 60851713327104000,
196
+ "step": 14000
197
+ },
198
+ {
199
+ "loss": 5.964859375,
200
+ "learning_rate": 1.3892624134668061e-05,
201
+ "epoch": 0.7221475173066387,
202
+ "total_flos": 63024988803072000,
203
+ "step": 14500
204
+ },
205
+ {
206
+ "loss": 5.938515625,
207
+ "learning_rate": 1.2647542208277304e-05,
208
+ "epoch": 0.7470491558344539,
209
+ "total_flos": 65198264279040000,
210
+ "step": 15000
211
+ },
212
+ {
213
+ "loss": 5.9313125,
214
+ "learning_rate": 1.1402460281886548e-05,
215
+ "epoch": 0.771950794362269,
216
+ "total_flos": 67371539755008000,
217
+ "step": 15500
218
+ },
219
+ {
220
+ "loss": 5.897203125,
221
+ "learning_rate": 1.0157378355495791e-05,
222
+ "epoch": 0.7968524328900841,
223
+ "total_flos": 69544815230976000,
224
+ "step": 16000
225
+ },
226
+ {
227
+ "loss": 5.880890625,
228
+ "learning_rate": 8.912296429105036e-06,
229
+ "epoch": 0.8217540714178992,
230
+ "total_flos": 71718090706944000,
231
+ "step": 16500
232
+ },
233
+ {
234
+ "loss": 5.86578125,
235
+ "learning_rate": 7.66721450271428e-06,
236
+ "epoch": 0.8466557099457144,
237
+ "total_flos": 73891366182912000,
238
+ "step": 17000
239
+ },
240
+ {
241
+ "loss": 5.839921875,
242
+ "learning_rate": 6.422132576323522e-06,
243
+ "epoch": 0.8715573484735295,
244
+ "total_flos": 76064641658880000,
245
+ "step": 17500
246
+ },
247
+ {
248
+ "loss": 5.851390625,
249
+ "learning_rate": 5.177050649932766e-06,
250
+ "epoch": 0.8964589870013447,
251
+ "total_flos": 78237917134848000,
252
+ "step": 18000
253
+ },
254
+ {
255
+ "loss": 5.8313125,
256
+ "learning_rate": 3.9319687235420096e-06,
257
+ "epoch": 0.9213606255291599,
258
+ "total_flos": 80411192610816000,
259
+ "step": 18500
260
+ },
261
+ {
262
+ "loss": 5.813203125,
263
+ "learning_rate": 2.6868867971512525e-06,
264
+ "epoch": 0.946262264056975,
265
+ "total_flos": 82584468086784000,
266
+ "step": 19000
267
+ },
268
+ {
269
+ "loss": 5.82146875,
270
+ "learning_rate": 1.441804870760496e-06,
271
+ "epoch": 0.9711639025847901,
272
+ "total_flos": 84757743562752000,
273
+ "step": 19500
274
+ },
275
+ {
276
+ "loss": 5.809875,
277
+ "learning_rate": 1.9672294436973953e-07,
278
+ "epoch": 0.9960655411126053,
279
+ "total_flos": 86931019038720000,
280
+ "step": 20000
281
+ }
282
+ ]
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49cdca272f55169d00329bcf0954449b134b1e4fcc5d98b96068f1363a3b840
3
+ size 481714453
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71367e5afabecf4d5bd74a4656cae6608c712860509d15976441da4b74f41a85
3
+ size 1775
vocab.json ADDED
The diff for this file is too large to render. See raw diff