JulienRPA commited on
Commit
adcf6be
1 Parent(s): 6192378

Model save

Browse files
generation_config.json CHANGED
@@ -1,5 +1,10 @@
1
  {
2
- "_from_model_config": true,
 
 
 
 
 
3
  "pad_token_id": 0,
4
- "transformers_version": "4.29.2"
5
  }
 
1
  {
2
+ "decoder_start_token_id": 101,
3
+ "early_stopping": true,
4
+ "eos_token_id": 102,
5
+ "max_length": 256,
6
+ "min_length": 16,
7
+ "num_beams": 10,
8
  "pad_token_id": 0,
9
+ "transformers_version": "4.30.0.dev0"
10
  }
last-checkpoint/added_tokens.json DELETED
@@ -1,446 +0,0 @@
1
- {
2
- "ACV": 31381,
3
- "Absolute": 31398,
4
- "Acidic": 31331,
5
- "Additive": 31301,
6
- "Admittance": 31228,
7
- "Affinity": 31407,
8
- "Alkaline": 31410,
9
- "Alternating": 31428,
10
- "Aluminum": 31446,
11
- "Amount": 31283,
12
- "Amperostat": 31359,
13
- "Analyzer": 31324,
14
- "Angle": 31420,
15
- "Angluar": 31356,
16
- "Anglular": 31348,
17
- "Angstrom": 31193,
18
- "Angular": 31472,
19
- "Anodic": 31334,
20
- "Approximation": 31440,
21
- "Aqueous": 31346,
22
- "Areal": 31425,
23
- "Areic": 31462,
24
- "Artifact": 31456,
25
- "Atom": 31352,
26
- "Auxiliary": 31387,
27
- "BAE": 31368,
28
- "Bandgap": 31421,
29
- "Battery": 31328,
30
- "Binary": 31307,
31
- "Binder": 31297,
32
- "Bode": 31191,
33
- "Breadth": 31338,
34
- "Bulk": 31196,
35
- "Butler": 31310,
36
- "Button": 31467,
37
- "CCCV": 31312,
38
- "CMC": 31315,
39
- "CPE": 31202,
40
- "CSV": 31336,
41
- "Capacitance": 31238,
42
- "Capacity": 31295,
43
- "Carnot": 31363,
44
- "Cathodic": 31357,
45
- "Characterisation": 31214,
46
- "Charge": 31460,
47
- "Charging": 31265,
48
- "Closed": 31285,
49
- "Cobalt": 31350,
50
- "Coefficient": 31289,
51
- "Coin": 31270,
52
- "Collector": 31445,
53
- "Conductance": 31263,
54
- "Conductive": 31454,
55
- "Conductivity": 31395,
56
- "Conductor": 31284,
57
- "Continuant": 31287,
58
- "Continuum": 31430,
59
- "Conversion": 31333,
60
- "Coulombic": 31218,
61
- "Coupling": 31192,
62
- "Coverage": 31200,
63
- "Crystalization": 31400,
64
- "Curve": 31215,
65
- "Cutoff": 31256,
66
- "Cutting": 31216,
67
- "Cycling": 31261,
68
- "Cylindrical": 31325,
69
- "DFN": 31293,
70
- "DMC": 31302,
71
- "DME": 31457,
72
- "DPV": 31183,
73
- "Debye": 31286,
74
- "Dedomena": 31222,
75
- "Discharge": 31274,
76
- "Discharging": 31452,
77
- "Disintegration": 31469,
78
- "Displacement": 31451,
79
- "Dissociation": 31354,
80
- "Donnan": 31210,
81
- "Doyle": 31366,
82
- "Drag": 31306,
83
- "EASA": 31320,
84
- "ECSA": 31418,
85
- "EIS": 31187,
86
- "EMC": 31409,
87
- "Elaboration": 31358,
88
- "Electrochemical": 31384,
89
- "Electrochemically": 31424,
90
- "Electrode": 31322,
91
- "Electrolyte": 31379,
92
- "Electrolytic": 31185,
93
- "Electromagnetic": 31203,
94
- "Electronically": 31471,
95
- "Electrons": 31271,
96
- "Electrooxidation": 31374,
97
- "Electroreduction": 31235,
98
- "Elemental": 31288,
99
- "Elementary": 31405,
100
- "Embarassingly": 31329,
101
- "Encoded": 31246,
102
- "Endurant": 31355,
103
- "Engineered": 31390,
104
- "Entity": 31294,
105
- "Equilibrium": 31209,
106
- "Equipment": 31197,
107
- "Erdey": 31237,
108
- "Executable": 31205,
109
- "Expansion": 31275,
110
- "Extinction": 31199,
111
- "FEC": 31389,
112
- "FRA": 31221,
113
- "Faraday": 31367,
114
- "Fermionic": 31364,
115
- "Float": 31231,
116
- "Forming": 31220,
117
- "Friction": 31386,
118
- "Fuller": 31305,
119
- "GDE": 31251,
120
- "Galvanizing": 31442,
121
- "Galvanostatic": 31415,
122
- "Generative": 31253,
123
- "Gravimetric": 31435,
124
- "Gruz": 31280,
125
- "Gyromagnetic": 31378,
126
- "Helmholtz": 31239,
127
- "Hold": 31250,
128
- "Holistic": 31244,
129
- "Icon": 31369,
130
- "Impedance": 31273,
131
- "Indifferent": 31184,
132
- "Inductance": 31223,
133
- "Inductive": 31383,
134
- "Inert": 31186,
135
- "Insertion": 31433,
136
- "Insulance": 31402,
137
- "Intangible": 31373,
138
- "Integral": 31427,
139
- "Intentional": 31317,
140
- "Intercalation": 31225,
141
- "Intermittent": 31245,
142
- "Interruption": 31240,
143
- "Invariant": 31278,
144
- "Inverse": 31453,
145
- "Ionic": 31434,
146
- "Ionization": 31319,
147
- "Jar": 31464,
148
- "Kynar": 31343,
149
- "LCO": 31401,
150
- "LFP": 31316,
151
- "LMO": 31303,
152
- "LNO": 31224,
153
- "LSV": 31365,
154
- "LTO": 31188,
155
- "Larmor": 31392,
156
- "Lifetime": 31236,
157
- "Limit": 31298,
158
- "Limiting": 31432,
159
- "Lineic": 31466,
160
- "Liquid": 31441,
161
- "Lithium": 31423,
162
- "Loading": 31266,
163
- "Loose": 31397,
164
- "Lorenz": 31309,
165
- "Macroscopic": 31361,
166
- "Magnetizing": 31399,
167
- "Magnetogyric": 31388,
168
- "Manganese": 31443,
169
- "Massic": 31272,
170
- "Maximal": 31360,
171
- "Measurand": 31347,
172
- "Measured": 31217,
173
- "Mereological": 31242,
174
- "Micron": 31438,
175
- "Minute": 31465,
176
- "Mixture": 31431,
177
- "Modulus": 31308,
178
- "Molar": 31201,
179
- "Molarity": 31257,
180
- "Mole": 31351,
181
- "Molecule": 31447,
182
- "Moment": 31254,
183
- "Multiplicity": 31323,
184
- "NCA": 31439,
185
- "NCM": 31448,
186
- "NHE": 31342,
187
- "NMC": 31414,
188
- "NPV": 31195,
189
- "Nernst": 31417,
190
- "Neutral": 31335,
191
- "Nickel": 31436,
192
- "Nucleon": 31264,
193
- "Numeral": 31458,
194
- "OCV": 31416,
195
- "OSWV": 31207,
196
- "Occurrent": 31337,
197
- "Ohmic": 31403,
198
- "Ordinary": 31455,
199
- "Osmotic": 31313,
200
- "Osteryoung": 31194,
201
- "Overpotential": 31429,
202
- "Overvoltage": 31259,
203
- "Oxidation": 31226,
204
- "Oxide": 31394,
205
- "PTFE": 31376,
206
- "PVDF": 31468,
207
- "Partion": 31426,
208
- "Perdurant": 31233,
209
- "Permeability": 31304,
210
- "Permittivity": 31345,
211
- "Phosphate": 31385,
212
- "Plane": 31437,
213
- "Plot": 31375,
214
- "Polarization": 31396,
215
- "Polarography": 31372,
216
- "Potentiostatic": 31269,
217
- "Pouch": 31268,
218
- "Pressureless": 31327,
219
- "Primitive": 31419,
220
- "Processed": 31299,
221
- "Qt": 31249,
222
- "Quantity": 31291,
223
- "Radioactive": 31408,
224
- "Radius": 31241,
225
- "Reachable": 31211,
226
- "Reactance": 31296,
227
- "Reactive": 31281,
228
- "Reciprocal": 31230,
229
- "Reluctance": 31413,
230
- "Repetency": 31255,
231
- "Resistivity": 31341,
232
- "Retention": 31382,
233
- "Reversal": 31258,
234
- "Reversible": 31473,
235
- "Rolling": 31461,
236
- "SAE": 31189,
237
- "SBR": 31349,
238
- "SEI": 31318,
239
- "SHE": 31326,
240
- "SWV": 31277,
241
- "Scan": 31267,
242
- "Selectivity": 31362,
243
- "Semiosis": 31292,
244
- "Semiotic": 31377,
245
- "Separator": 31260,
246
- "Shear": 31262,
247
- "Shelf": 31444,
248
- "Shuttlecock": 31321,
249
- "Simulacrum": 31449,
250
- "Sintering": 31463,
251
- "Solubility": 31290,
252
- "Solute": 31332,
253
- "Solvent": 31243,
254
- "Spatially": 31353,
255
- "Spatiialnteraction": 31314,
256
- "Spatio": 31340,
257
- "Stopping": 31370,
258
- "Strength": 31198,
259
- "Substance": 31190,
260
- "Supercapacitor": 31412,
261
- "Swagelok": 31248,
262
- "Sweep": 31344,
263
- "Swing": 31232,
264
- "TAST": 31404,
265
- "TFSI": 31252,
266
- "Tangible": 31422,
267
- "Teflon": 31219,
268
- "Tension": 31276,
269
- "Theorization": 31406,
270
- "Thermodynamic": 31208,
271
- "Thermogalvanic": 31247,
272
- "Tiling": 31470,
273
- "Titanium": 31380,
274
- "Titration": 31229,
275
- "Transferred": 31300,
276
- "Transferrence": 31339,
277
- "Transmittance": 31459,
278
- "Vacuum": 31282,
279
- "Vapour": 31212,
280
- "Viscosity": 31311,
281
- "Volmer": 31393,
282
- "Voltameter": 31279,
283
- "Voltammetry": 31213,
284
- "Volumetric": 31450,
285
- "Volumic": 31411,
286
- "Wholistic": 31227,
287
- "Window": 31206,
288
- "Workflow": 31391,
289
- "Youngs": 31204,
290
- "Zinc": 31234,
291
- "and_logical": 31534,
292
- "attr_close": 31519,
293
- "attr_open": 31531,
294
- "avg": 31485,
295
- "battery__": 31182,
296
- "bool_not": 31518,
297
- "brack_close": 31530,
298
- "brack_open": 31538,
299
- "brick__": 31163,
300
- "ceil": 31516,
301
- "choose_by_max": 31477,
302
- "choose_by_min": 31515,
303
- "coalesce": 31491,
304
- "concat": 31497,
305
- "constant__": 31171,
306
- "csvw__": 31127,
307
- "datatype": 31480,
308
- "dbo__": 31156,
309
- "dbp__": 31167,
310
- "dbr__": 31165,
311
- "dc__": 31151,
312
- "dcam__": 31132,
313
- "dcat__": 31120,
314
- "dcmitype__": 31142,
315
- "dcterms__": 31168,
316
- "doap__": 31118,
317
- "dur_to_usecs": 31508,
318
- "electrochemistry__": 31181,
319
- "electrochemistrys__": 31148,
320
- "electromotive": 31371,
321
- "emmo__": 31116,
322
- "encode_for_uri": 31489,
323
- "foaf__": 31162,
324
- "geo__": 31158,
325
- "group_concat": 31484,
326
- "hamming_dist": 31475,
327
- "haversine_dist": 31503,
328
- "isblank": 31482,
329
- "isiri": 31513,
330
- "isliteral": 31494,
331
- "isnumeric": 31496,
332
- "isuri": 31481,
333
- "lang_at": 31528,
334
- "langmatches": 31486,
335
- "lcase": 31509,
336
- "levenshtein_dist": 31488,
337
- "log2": 31474,
338
- "manufacturing__": 31125,
339
- "map__": 31170,
340
- "math_eql": 31539,
341
- "math_geq": 31535,
342
- "math_gt": 31536,
343
- "math_leq": 31521,
344
- "math_lt": 31537,
345
- "math_neq": 31523,
346
- "md5": 31504,
347
- "odrl__": 31136,
348
- "or_logical": 31524,
349
- "org__": 31175,
350
- "owl__": 31131,
351
- "p__": 31176,
352
- "par_close": 31526,
353
- "par_open": 31520,
354
- "periodictable__": 31145,
355
- "pmanufacturing__": 31149,
356
- "pq__": 31122,
357
- "pqn__": 31174,
358
- "pqv__": 31128,
359
- "pr__": 31166,
360
- "prefix__": 31119,
361
- "prn__": 31146,
362
- "prof__": 31124,
363
- "prov__": 31159,
364
- "prv__": 31173,
365
- "ps__": 31141,
366
- "psn__": 31140,
367
- "psv__": 31129,
368
- "qb__": 31137,
369
- "qkdv__": 31157,
370
- "quantitykind__": 31154,
371
- "qudt__": 31178,
372
- "quote_str": 31517,
373
- "radians": 31512,
374
- "rand": 31495,
375
- "rdf__": 31134,
376
- "rdfs__": 31138,
377
- "regex": 31476,
378
- "rounddown": 31511,
379
- "roundup": 31499,
380
- "sameterm": 31493,
381
- "schema__": 31160,
382
- "seconds_dbl": 31483,
383
- "sep_com": 31525,
384
- "sep_dot": 31529,
385
- "sep_semi": 31532,
386
- "sh__": 31153,
387
- "sha1": 31506,
388
- "sha256": 31487,
389
- "sha384": 31507,
390
- "sha512": 31502,
391
- "skos__": 31130,
392
- "soqk__": 31164,
393
- "sosa__": 31150,
394
- "sou__": 31155,
395
- "ssn__": 31152,
396
- "str_type": 31533,
397
- "strafter": 31500,
398
- "strbefore": 31498,
399
- "strends": 31479,
400
- "strlen": 31510,
401
- "strstarts": 31514,
402
- "swrl__": 31144,
403
- "time__": 31161,
404
- "timezone": 31478,
405
- "tz": 31490,
406
- "ucase": 31505,
407
- "unit__": 31177,
408
- "usecs_to_dur": 31501,
409
- "value0": 31550,
410
- "value1": 31551,
411
- "value2": 31552,
412
- "value3": 31553,
413
- "value4": 31554,
414
- "value5": 31555,
415
- "value6": 31556,
416
- "value7": 31557,
417
- "value8": 31558,
418
- "value9": 31559,
419
- "vann__": 31143,
420
- "var0": 31540,
421
- "var1": 31541,
422
- "var2": 31542,
423
- "var3": 31543,
424
- "var4": 31544,
425
- "var5": 31545,
426
- "var6": 31546,
427
- "var7": 31547,
428
- "var8": 31548,
429
- "var9": 31549,
430
- "var_": 31522,
431
- "varp": 31492,
432
- "void__": 31123,
433
- "voltammetry": 31330,
434
- "wd__": 31139,
435
- "wdata__": 31135,
436
- "wdno__": 31169,
437
- "wdref__": 31179,
438
- "wds__": 31117,
439
- "wdt__": 31147,
440
- "wdtn__": 31126,
441
- "wdv__": 31133,
442
- "wgs__": 31180,
443
- "wildcard": 31527,
444
- "xml__": 31121,
445
- "xsd__": 31172
446
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/config.json DELETED
@@ -1,182 +0,0 @@
1
- {
2
- "_commit_hash": null,
3
- "_name_or_path": "bert2bert_added_vocab",
4
- "architectures": [
5
- "EncoderDecoderModel"
6
- ],
7
- "decoder": {
8
- "_name_or_path": "decoder_added_vocab",
9
- "add_cross_attention": true,
10
- "architectures": [
11
- "BertModel"
12
- ],
13
- "attention_probs_dropout_prob": 0.1,
14
- "bad_words_ids": null,
15
- "begin_suppress_tokens": null,
16
- "bos_token_id": null,
17
- "chunk_size_feed_forward": 0,
18
- "classifier_dropout": null,
19
- "cross_attention_hidden_size": null,
20
- "decoder_start_token_id": null,
21
- "diversity_penalty": 0.0,
22
- "do_sample": false,
23
- "early_stopping": false,
24
- "encoder_no_repeat_ngram_size": 0,
25
- "eos_token_id": null,
26
- "exponential_decay_length_penalty": null,
27
- "finetuning_task": null,
28
- "forced_bos_token_id": null,
29
- "forced_eos_token_id": null,
30
- "gradient_checkpointing": false,
31
- "hidden_act": "gelu",
32
- "hidden_dropout_prob": 0.1,
33
- "hidden_size": 768,
34
- "id2label": {
35
- "0": "LABEL_0",
36
- "1": "LABEL_1"
37
- },
38
- "initializer_range": 0.02,
39
- "intermediate_size": 3072,
40
- "is_decoder": true,
41
- "is_encoder_decoder": false,
42
- "label2id": {
43
- "LABEL_0": 0,
44
- "LABEL_1": 1
45
- },
46
- "layer_norm_eps": 1e-12,
47
- "length_penalty": 1.0,
48
- "max_length": 20,
49
- "max_position_embeddings": 512,
50
- "min_length": 0,
51
- "model_type": "bert",
52
- "no_repeat_ngram_size": 0,
53
- "num_attention_heads": 12,
54
- "num_beam_groups": 1,
55
- "num_beams": 1,
56
- "num_hidden_layers": 12,
57
- "num_return_sequences": 1,
58
- "output_attentions": false,
59
- "output_hidden_states": false,
60
- "output_scores": false,
61
- "pad_token_id": 0,
62
- "position_embedding_type": "absolute",
63
- "prefix": null,
64
- "problem_type": null,
65
- "pruned_heads": {},
66
- "remove_invalid_values": false,
67
- "repetition_penalty": 1.0,
68
- "return_dict": true,
69
- "return_dict_in_generate": false,
70
- "sep_token_id": null,
71
- "suppress_tokens": null,
72
- "task_specific_params": null,
73
- "temperature": 1.0,
74
- "tf_legacy_loss": false,
75
- "tie_encoder_decoder": false,
76
- "tie_word_embeddings": true,
77
- "tokenizer_class": null,
78
- "top_k": 50,
79
- "top_p": 1.0,
80
- "torch_dtype": "float32",
81
- "torchscript": false,
82
- "transformers_version": "4.30.0.dev0",
83
- "type_vocab_size": 2,
84
- "typical_p": 1.0,
85
- "use_bfloat16": false,
86
- "use_cache": true,
87
- "vocab_size": 31560
88
- },
89
- "decoder_start_token_id": 101,
90
- "early_stopping": true,
91
- "encoder": {
92
- "_name_or_path": "encoder_added_vocab",
93
- "add_cross_attention": false,
94
- "architectures": [
95
- "BertModel"
96
- ],
97
- "attention_probs_dropout_prob": 0.1,
98
- "bad_words_ids": null,
99
- "begin_suppress_tokens": null,
100
- "bos_token_id": null,
101
- "chunk_size_feed_forward": 0,
102
- "classifier_dropout": null,
103
- "cross_attention_hidden_size": null,
104
- "decoder_start_token_id": null,
105
- "diversity_penalty": 0.0,
106
- "do_sample": false,
107
- "early_stopping": false,
108
- "encoder_no_repeat_ngram_size": 0,
109
- "eos_token_id": null,
110
- "exponential_decay_length_penalty": null,
111
- "finetuning_task": null,
112
- "forced_bos_token_id": null,
113
- "forced_eos_token_id": null,
114
- "hidden_act": "gelu",
115
- "hidden_dropout_prob": 0.1,
116
- "hidden_size": 768,
117
- "id2label": {
118
- "0": "LABEL_0",
119
- "1": "LABEL_1"
120
- },
121
- "initializer_range": 0.02,
122
- "intermediate_size": 3072,
123
- "is_decoder": false,
124
- "is_encoder_decoder": false,
125
- "label2id": {
126
- "LABEL_0": 0,
127
- "LABEL_1": 1
128
- },
129
- "layer_norm_eps": 1e-12,
130
- "length_penalty": 1.0,
131
- "max_length": 20,
132
- "max_position_embeddings": 512,
133
- "min_length": 0,
134
- "model_type": "bert",
135
- "no_repeat_ngram_size": 0,
136
- "num_attention_heads": 12,
137
- "num_beam_groups": 1,
138
- "num_beams": 1,
139
- "num_hidden_layers": 12,
140
- "num_return_sequences": 1,
141
- "output_attentions": false,
142
- "output_hidden_states": false,
143
- "output_scores": false,
144
- "pad_token_id": 0,
145
- "position_embedding_type": "absolute",
146
- "prefix": null,
147
- "problem_type": null,
148
- "pruned_heads": {},
149
- "remove_invalid_values": false,
150
- "repetition_penalty": 1.0,
151
- "return_dict": true,
152
- "return_dict_in_generate": false,
153
- "sep_token_id": null,
154
- "suppress_tokens": null,
155
- "task_specific_params": null,
156
- "temperature": 1.0,
157
- "tf_legacy_loss": false,
158
- "tie_encoder_decoder": false,
159
- "tie_word_embeddings": true,
160
- "tokenizer_class": null,
161
- "top_k": 50,
162
- "top_p": 1.0,
163
- "torch_dtype": "float32",
164
- "torchscript": false,
165
- "transformers_version": "4.30.0.dev0",
166
- "type_vocab_size": 2,
167
- "typical_p": 1.0,
168
- "use_bfloat16": false,
169
- "use_cache": true,
170
- "vocab_size": 31560
171
- },
172
- "eos_token_id": 102,
173
- "is_encoder_decoder": true,
174
- "max_length": 256,
175
- "min_length": 16,
176
- "model_type": "encoder-decoder",
177
- "num_beams": 10,
178
- "pad_token_id": 0,
179
- "torch_dtype": "float32",
180
- "transformers_version": null,
181
- "vocab_size": 31560
182
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/generation_config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "decoder_start_token_id": 101,
3
- "early_stopping": true,
4
- "eos_token_id": 102,
5
- "max_length": 256,
6
- "min_length": 16,
7
- "num_beams": 10,
8
- "pad_token_id": 0,
9
- "transformers_version": "4.30.0.dev0"
10
- }
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:27f4d0eddea1b38fe1b064b634ee31b71591cebd4a1e1ea45cc654a255e6f2a6
3
- size 1987250795
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a0d0aa4eebf05151380698116acb15efc7e866ed933da42068ae8c22cfa382e
3
- size 996026489
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:46c405f6c1e391e2b416a111eb36de129465177d85a3f632dda3a1eb030336c8
3
- size 14575
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e8f70b4a42839623f23a2d7f6f16070a0bb7a8546d17d8052ec84cfde1f2b48
3
- size 627
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
last-checkpoint/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "clean_up_tokenization_spaces": true,
3
- "cls_token": "[CLS]",
4
- "do_basic_tokenize": true,
5
- "do_lower_case": true,
6
- "mask_token": "[MASK]",
7
- "model_max_length": 256,
8
- "never_split": null,
9
- "pad_token": "[PAD]",
10
- "sep_token": "[SEP]",
11
- "strip_accents": null,
12
- "tokenize_chinese_chars": true,
13
- "tokenizer_class": "BertTokenizer",
14
- "unk_token": "[UNK]"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,1336 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 8.298755186721992,
5
- "global_step": 20000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.04,
12
- "learning_rate": 2.0000000000000003e-06,
13
- "loss": 92.256,
14
- "step": 100
15
- },
16
- {
17
- "epoch": 0.08,
18
- "learning_rate": 4.000000000000001e-06,
19
- "loss": 52.0764,
20
- "step": 200
21
- },
22
- {
23
- "epoch": 0.12,
24
- "learning_rate": 6e-06,
25
- "loss": 41.3627,
26
- "step": 300
27
- },
28
- {
29
- "epoch": 0.17,
30
- "learning_rate": 8.000000000000001e-06,
31
- "loss": 37.237,
32
- "step": 400
33
- },
34
- {
35
- "epoch": 0.21,
36
- "learning_rate": 1e-05,
37
- "loss": 34.3031,
38
- "step": 500
39
- },
40
- {
41
- "epoch": 0.25,
42
- "learning_rate": 1.2e-05,
43
- "loss": 31.7147,
44
- "step": 600
45
- },
46
- {
47
- "epoch": 0.29,
48
- "learning_rate": 1.4000000000000001e-05,
49
- "loss": 29.5475,
50
- "step": 700
51
- },
52
- {
53
- "epoch": 0.33,
54
- "learning_rate": 1.6000000000000003e-05,
55
- "loss": 27.914,
56
- "step": 800
57
- },
58
- {
59
- "epoch": 0.37,
60
- "learning_rate": 1.8e-05,
61
- "loss": 26.1499,
62
- "step": 900
63
- },
64
- {
65
- "epoch": 0.41,
66
- "learning_rate": 2e-05,
67
- "loss": 24.1629,
68
- "step": 1000
69
- },
70
- {
71
- "epoch": 0.46,
72
- "learning_rate": 2.2000000000000003e-05,
73
- "loss": 22.1829,
74
- "step": 1100
75
- },
76
- {
77
- "epoch": 0.5,
78
- "learning_rate": 2.4e-05,
79
- "loss": 20.2111,
80
- "step": 1200
81
- },
82
- {
83
- "epoch": 0.54,
84
- "learning_rate": 2.6000000000000002e-05,
85
- "loss": 18.6642,
86
- "step": 1300
87
- },
88
- {
89
- "epoch": 0.58,
90
- "learning_rate": 2.8000000000000003e-05,
91
- "loss": 16.7946,
92
- "step": 1400
93
- },
94
- {
95
- "epoch": 0.62,
96
- "learning_rate": 3e-05,
97
- "loss": 15.0783,
98
- "step": 1500
99
- },
100
- {
101
- "epoch": 0.66,
102
- "learning_rate": 3.2000000000000005e-05,
103
- "loss": 13.366,
104
- "step": 1600
105
- },
106
- {
107
- "epoch": 0.71,
108
- "learning_rate": 3.4000000000000007e-05,
109
- "loss": 11.706,
110
- "step": 1700
111
- },
112
- {
113
- "epoch": 0.75,
114
- "learning_rate": 3.6e-05,
115
- "loss": 10.2113,
116
- "step": 1800
117
- },
118
- {
119
- "epoch": 0.79,
120
- "learning_rate": 3.8e-05,
121
- "loss": 8.5921,
122
- "step": 1900
123
- },
124
- {
125
- "epoch": 0.83,
126
- "learning_rate": 4e-05,
127
- "loss": 7.2445,
128
- "step": 2000
129
- },
130
- {
131
- "epoch": 0.83,
132
- "eval_bleu": 1.523,
133
- "eval_em": 0.0,
134
- "eval_gen_len": 204.48,
135
- "eval_loss": 7.561939239501953,
136
- "eval_rm": NaN,
137
- "eval_runtime": 100.5427,
138
- "eval_samples_per_second": 0.497,
139
- "eval_steps_per_second": 0.07,
140
- "step": 2000
141
- },
142
- {
143
- "epoch": 0.87,
144
- "learning_rate": 4.2e-05,
145
- "loss": 6.8461,
146
- "step": 2100
147
- },
148
- {
149
- "epoch": 0.91,
150
- "learning_rate": 4.4000000000000006e-05,
151
- "loss": 6.223,
152
- "step": 2200
153
- },
154
- {
155
- "epoch": 0.95,
156
- "learning_rate": 4.600000000000001e-05,
157
- "loss": 6.1167,
158
- "step": 2300
159
- },
160
- {
161
- "epoch": 1.0,
162
- "learning_rate": 4.8e-05,
163
- "loss": 5.7623,
164
- "step": 2400
165
- },
166
- {
167
- "epoch": 1.04,
168
- "learning_rate": 5e-05,
169
- "loss": 5.5056,
170
- "step": 2500
171
- },
172
- {
173
- "epoch": 1.08,
174
- "learning_rate": 4.976851851851852e-05,
175
- "loss": 5.2979,
176
- "step": 2600
177
- },
178
- {
179
- "epoch": 1.12,
180
- "learning_rate": 4.9537037037037035e-05,
181
- "loss": 4.9765,
182
- "step": 2700
183
- },
184
- {
185
- "epoch": 1.16,
186
- "learning_rate": 4.930555555555556e-05,
187
- "loss": 4.7721,
188
- "step": 2800
189
- },
190
- {
191
- "epoch": 1.2,
192
- "learning_rate": 4.9074074074074075e-05,
193
- "loss": 4.6957,
194
- "step": 2900
195
- },
196
- {
197
- "epoch": 1.24,
198
- "learning_rate": 4.8842592592592595e-05,
199
- "loss": 4.7368,
200
- "step": 3000
201
- },
202
- {
203
- "epoch": 1.29,
204
- "learning_rate": 4.8611111111111115e-05,
205
- "loss": 4.6074,
206
- "step": 3100
207
- },
208
- {
209
- "epoch": 1.33,
210
- "learning_rate": 4.837962962962963e-05,
211
- "loss": 4.3678,
212
- "step": 3200
213
- },
214
- {
215
- "epoch": 1.37,
216
- "learning_rate": 4.814814814814815e-05,
217
- "loss": 4.19,
218
- "step": 3300
219
- },
220
- {
221
- "epoch": 1.41,
222
- "learning_rate": 4.791666666666667e-05,
223
- "loss": 4.0959,
224
- "step": 3400
225
- },
226
- {
227
- "epoch": 1.45,
228
- "learning_rate": 4.768518518518519e-05,
229
- "loss": 4.0323,
230
- "step": 3500
231
- },
232
- {
233
- "epoch": 1.49,
234
- "learning_rate": 4.745370370370371e-05,
235
- "loss": 3.7777,
236
- "step": 3600
237
- },
238
- {
239
- "epoch": 1.54,
240
- "learning_rate": 4.722222222222222e-05,
241
- "loss": 3.8014,
242
- "step": 3700
243
- },
244
- {
245
- "epoch": 1.58,
246
- "learning_rate": 4.699074074074074e-05,
247
- "loss": 3.6346,
248
- "step": 3800
249
- },
250
- {
251
- "epoch": 1.62,
252
- "learning_rate": 4.675925925925926e-05,
253
- "loss": 3.4876,
254
- "step": 3900
255
- },
256
- {
257
- "epoch": 1.66,
258
- "learning_rate": 4.652777777777778e-05,
259
- "loss": 3.4458,
260
- "step": 4000
261
- },
262
- {
263
- "epoch": 1.66,
264
- "eval_bleu": 16.0372,
265
- "eval_em": 0.0,
266
- "eval_gen_len": 76.36,
267
- "eval_loss": 3.4201648235321045,
268
- "eval_rm": NaN,
269
- "eval_runtime": 91.7678,
270
- "eval_samples_per_second": 0.545,
271
- "eval_steps_per_second": 0.076,
272
- "step": 4000
273
- },
274
- {
275
- "epoch": 1.7,
276
- "learning_rate": 4.62962962962963e-05,
277
- "loss": 3.4425,
278
- "step": 4100
279
- },
280
- {
281
- "epoch": 1.74,
282
- "learning_rate": 4.6064814814814814e-05,
283
- "loss": 3.3346,
284
- "step": 4200
285
- },
286
- {
287
- "epoch": 1.78,
288
- "learning_rate": 4.5833333333333334e-05,
289
- "loss": 3.2582,
290
- "step": 4300
291
- },
292
- {
293
- "epoch": 1.83,
294
- "learning_rate": 4.5601851851851854e-05,
295
- "loss": 3.1174,
296
- "step": 4400
297
- },
298
- {
299
- "epoch": 1.87,
300
- "learning_rate": 4.5370370370370374e-05,
301
- "loss": 2.9873,
302
- "step": 4500
303
- },
304
- {
305
- "epoch": 1.91,
306
- "learning_rate": 4.5138888888888894e-05,
307
- "loss": 3.1041,
308
- "step": 4600
309
- },
310
- {
311
- "epoch": 1.95,
312
- "learning_rate": 4.490740740740741e-05,
313
- "loss": 2.9365,
314
- "step": 4700
315
- },
316
- {
317
- "epoch": 1.99,
318
- "learning_rate": 4.467592592592593e-05,
319
- "loss": 2.9494,
320
- "step": 4800
321
- },
322
- {
323
- "epoch": 2.03,
324
- "learning_rate": 4.4444444444444447e-05,
325
- "loss": 2.8309,
326
- "step": 4900
327
- },
328
- {
329
- "epoch": 2.07,
330
- "learning_rate": 4.4212962962962966e-05,
331
- "loss": 2.6814,
332
- "step": 5000
333
- },
334
- {
335
- "epoch": 2.12,
336
- "learning_rate": 4.3981481481481486e-05,
337
- "loss": 2.6331,
338
- "step": 5100
339
- },
340
- {
341
- "epoch": 2.16,
342
- "learning_rate": 4.375e-05,
343
- "loss": 2.599,
344
- "step": 5200
345
- },
346
- {
347
- "epoch": 2.2,
348
- "learning_rate": 4.351851851851852e-05,
349
- "loss": 2.5576,
350
- "step": 5300
351
- },
352
- {
353
- "epoch": 2.24,
354
- "learning_rate": 4.328703703703704e-05,
355
- "loss": 2.4838,
356
- "step": 5400
357
- },
358
- {
359
- "epoch": 2.28,
360
- "learning_rate": 4.305555555555556e-05,
361
- "loss": 2.4532,
362
- "step": 5500
363
- },
364
- {
365
- "epoch": 2.32,
366
- "learning_rate": 4.282407407407408e-05,
367
- "loss": 2.3583,
368
- "step": 5600
369
- },
370
- {
371
- "epoch": 2.37,
372
- "learning_rate": 4.259259259259259e-05,
373
- "loss": 2.4245,
374
- "step": 5700
375
- },
376
- {
377
- "epoch": 2.41,
378
- "learning_rate": 4.236111111111111e-05,
379
- "loss": 2.3121,
380
- "step": 5800
381
- },
382
- {
383
- "epoch": 2.45,
384
- "learning_rate": 4.212962962962963e-05,
385
- "loss": 2.3254,
386
- "step": 5900
387
- },
388
- {
389
- "epoch": 2.49,
390
- "learning_rate": 4.1898148148148145e-05,
391
- "loss": 2.2571,
392
- "step": 6000
393
- },
394
- {
395
- "epoch": 2.49,
396
- "eval_bleu": 34.5826,
397
- "eval_em": 0.0,
398
- "eval_gen_len": 48.98,
399
- "eval_loss": 2.197143316268921,
400
- "eval_rm": 0.8125,
401
- "eval_runtime": 79.1278,
402
- "eval_samples_per_second": 0.632,
403
- "eval_steps_per_second": 0.088,
404
- "step": 6000
405
- },
406
- {
407
- "epoch": 2.53,
408
- "learning_rate": 4.166666666666667e-05,
409
- "loss": 2.2655,
410
- "step": 6100
411
- },
412
- {
413
- "epoch": 2.57,
414
- "learning_rate": 4.1435185185185185e-05,
415
- "loss": 2.1366,
416
- "step": 6200
417
- },
418
- {
419
- "epoch": 2.61,
420
- "learning_rate": 4.1203703703703705e-05,
421
- "loss": 2.1777,
422
- "step": 6300
423
- },
424
- {
425
- "epoch": 2.66,
426
- "learning_rate": 4.0972222222222225e-05,
427
- "loss": 2.1166,
428
- "step": 6400
429
- },
430
- {
431
- "epoch": 2.7,
432
- "learning_rate": 4.074074074074074e-05,
433
- "loss": 2.0804,
434
- "step": 6500
435
- },
436
- {
437
- "epoch": 2.74,
438
- "learning_rate": 4.0509259259259265e-05,
439
- "loss": 2.1024,
440
- "step": 6600
441
- },
442
- {
443
- "epoch": 2.78,
444
- "learning_rate": 4.027777777777778e-05,
445
- "loss": 2.0787,
446
- "step": 6700
447
- },
448
- {
449
- "epoch": 2.82,
450
- "learning_rate": 4.00462962962963e-05,
451
- "loss": 1.9875,
452
- "step": 6800
453
- },
454
- {
455
- "epoch": 2.86,
456
- "learning_rate": 3.981481481481482e-05,
457
- "loss": 1.9653,
458
- "step": 6900
459
- },
460
- {
461
- "epoch": 2.9,
462
- "learning_rate": 3.958333333333333e-05,
463
- "loss": 1.9254,
464
- "step": 7000
465
- },
466
- {
467
- "epoch": 2.95,
468
- "learning_rate": 3.935185185185186e-05,
469
- "loss": 1.9232,
470
- "step": 7100
471
- },
472
- {
473
- "epoch": 2.99,
474
- "learning_rate": 3.912037037037037e-05,
475
- "loss": 1.8927,
476
- "step": 7200
477
- },
478
- {
479
- "epoch": 3.03,
480
- "learning_rate": 3.888888888888889e-05,
481
- "loss": 1.7833,
482
- "step": 7300
483
- },
484
- {
485
- "epoch": 3.07,
486
- "learning_rate": 3.865740740740741e-05,
487
- "loss": 1.6375,
488
- "step": 7400
489
- },
490
- {
491
- "epoch": 3.11,
492
- "learning_rate": 3.8425925925925924e-05,
493
- "loss": 1.7233,
494
- "step": 7500
495
- },
496
- {
497
- "epoch": 3.15,
498
- "learning_rate": 3.8194444444444444e-05,
499
- "loss": 1.6997,
500
- "step": 7600
501
- },
502
- {
503
- "epoch": 3.2,
504
- "learning_rate": 3.7962962962962964e-05,
505
- "loss": 1.746,
506
- "step": 7700
507
- },
508
- {
509
- "epoch": 3.24,
510
- "learning_rate": 3.7731481481481484e-05,
511
- "loss": 1.6235,
512
- "step": 7800
513
- },
514
- {
515
- "epoch": 3.28,
516
- "learning_rate": 3.7500000000000003e-05,
517
- "loss": 1.564,
518
- "step": 7900
519
- },
520
- {
521
- "epoch": 3.32,
522
- "learning_rate": 3.726851851851852e-05,
523
- "loss": 1.5696,
524
- "step": 8000
525
- },
526
- {
527
- "epoch": 3.32,
528
- "eval_bleu": 53.8822,
529
- "eval_em": 0.0,
530
- "eval_gen_len": 48.7,
531
- "eval_loss": 1.680190086364746,
532
- "eval_rm": 1.0,
533
- "eval_runtime": 52.3216,
534
- "eval_samples_per_second": 0.956,
535
- "eval_steps_per_second": 0.134,
536
- "step": 8000
537
- },
538
- {
539
- "epoch": 3.36,
540
- "learning_rate": 3.7037037037037037e-05,
541
- "loss": 1.5516,
542
- "step": 8100
543
- },
544
- {
545
- "epoch": 3.4,
546
- "learning_rate": 3.6805555555555556e-05,
547
- "loss": 1.5438,
548
- "step": 8200
549
- },
550
- {
551
- "epoch": 3.44,
552
- "learning_rate": 3.6574074074074076e-05,
553
- "loss": 1.5262,
554
- "step": 8300
555
- },
556
- {
557
- "epoch": 3.49,
558
- "learning_rate": 3.6342592592592596e-05,
559
- "loss": 1.5738,
560
- "step": 8400
561
- },
562
- {
563
- "epoch": 3.53,
564
- "learning_rate": 3.611111111111111e-05,
565
- "loss": 1.5799,
566
- "step": 8500
567
- },
568
- {
569
- "epoch": 3.57,
570
- "learning_rate": 3.587962962962963e-05,
571
- "loss": 1.5116,
572
- "step": 8600
573
- },
574
- {
575
- "epoch": 3.61,
576
- "learning_rate": 3.564814814814815e-05,
577
- "loss": 1.5699,
578
- "step": 8700
579
- },
580
- {
581
- "epoch": 3.65,
582
- "learning_rate": 3.541666666666667e-05,
583
- "loss": 1.4994,
584
- "step": 8800
585
- },
586
- {
587
- "epoch": 3.69,
588
- "learning_rate": 3.518518518518519e-05,
589
- "loss": 1.3772,
590
- "step": 8900
591
- },
592
- {
593
- "epoch": 3.73,
594
- "learning_rate": 3.49537037037037e-05,
595
- "loss": 1.4322,
596
- "step": 9000
597
- },
598
- {
599
- "epoch": 3.78,
600
- "learning_rate": 3.472222222222222e-05,
601
- "loss": 1.4604,
602
- "step": 9100
603
- },
604
- {
605
- "epoch": 3.82,
606
- "learning_rate": 3.449074074074074e-05,
607
- "loss": 1.3924,
608
- "step": 9200
609
- },
610
- {
611
- "epoch": 3.86,
612
- "learning_rate": 3.425925925925926e-05,
613
- "loss": 1.348,
614
- "step": 9300
615
- },
616
- {
617
- "epoch": 3.9,
618
- "learning_rate": 3.402777777777778e-05,
619
- "loss": 1.3014,
620
- "step": 9400
621
- },
622
- {
623
- "epoch": 3.94,
624
- "learning_rate": 3.3796296296296295e-05,
625
- "loss": 1.3731,
626
- "step": 9500
627
- },
628
- {
629
- "epoch": 3.98,
630
- "learning_rate": 3.3564814814814815e-05,
631
- "loss": 1.39,
632
- "step": 9600
633
- },
634
- {
635
- "epoch": 4.02,
636
- "learning_rate": 3.3333333333333335e-05,
637
- "loss": 1.2038,
638
- "step": 9700
639
- },
640
- {
641
- "epoch": 4.07,
642
- "learning_rate": 3.3101851851851855e-05,
643
- "loss": 1.1698,
644
- "step": 9800
645
- },
646
- {
647
- "epoch": 4.11,
648
- "learning_rate": 3.2870370370370375e-05,
649
- "loss": 1.1514,
650
- "step": 9900
651
- },
652
- {
653
- "epoch": 4.15,
654
- "learning_rate": 3.263888888888889e-05,
655
- "loss": 1.1359,
656
- "step": 10000
657
- },
658
- {
659
- "epoch": 4.15,
660
- "eval_bleu": 64.5591,
661
- "eval_em": 0.02,
662
- "eval_gen_len": 45.2,
663
- "eval_loss": 1.346365213394165,
664
- "eval_rm": 0.973,
665
- "eval_runtime": 47.5326,
666
- "eval_samples_per_second": 1.052,
667
- "eval_steps_per_second": 0.147,
668
- "step": 10000
669
- },
670
- {
671
- "epoch": 4.19,
672
- "learning_rate": 3.240740740740741e-05,
673
- "loss": 1.1396,
674
- "step": 10100
675
- },
676
- {
677
- "epoch": 4.23,
678
- "learning_rate": 3.217592592592593e-05,
679
- "loss": 1.1749,
680
- "step": 10200
681
- },
682
- {
683
- "epoch": 4.27,
684
- "learning_rate": 3.194444444444444e-05,
685
- "loss": 1.1098,
686
- "step": 10300
687
- },
688
- {
689
- "epoch": 4.32,
690
- "learning_rate": 3.171296296296297e-05,
691
- "loss": 1.0741,
692
- "step": 10400
693
- },
694
- {
695
- "epoch": 4.36,
696
- "learning_rate": 3.148148148148148e-05,
697
- "loss": 1.1388,
698
- "step": 10500
699
- },
700
- {
701
- "epoch": 4.4,
702
- "learning_rate": 3.125e-05,
703
- "loss": 1.0168,
704
- "step": 10600
705
- },
706
- {
707
- "epoch": 4.44,
708
- "learning_rate": 3.101851851851852e-05,
709
- "loss": 1.0862,
710
- "step": 10700
711
- },
712
- {
713
- "epoch": 4.48,
714
- "learning_rate": 3.0787037037037034e-05,
715
- "loss": 1.0689,
716
- "step": 10800
717
- },
718
- {
719
- "epoch": 4.52,
720
- "learning_rate": 3.055555555555556e-05,
721
- "loss": 1.0804,
722
- "step": 10900
723
- },
724
- {
725
- "epoch": 4.56,
726
- "learning_rate": 3.0324074074074077e-05,
727
- "loss": 1.0312,
728
- "step": 11000
729
- },
730
- {
731
- "epoch": 4.61,
732
- "learning_rate": 3.0092592592592593e-05,
733
- "loss": 1.0184,
734
- "step": 11100
735
- },
736
- {
737
- "epoch": 4.65,
738
- "learning_rate": 2.9861111111111113e-05,
739
- "loss": 0.992,
740
- "step": 11200
741
- },
742
- {
743
- "epoch": 4.69,
744
- "learning_rate": 2.962962962962963e-05,
745
- "loss": 1.0191,
746
- "step": 11300
747
- },
748
- {
749
- "epoch": 4.73,
750
- "learning_rate": 2.9398148148148146e-05,
751
- "loss": 1.0842,
752
- "step": 11400
753
- },
754
- {
755
- "epoch": 4.77,
756
- "learning_rate": 2.916666666666667e-05,
757
- "loss": 1.0508,
758
- "step": 11500
759
- },
760
- {
761
- "epoch": 4.81,
762
- "learning_rate": 2.8935185185185186e-05,
763
- "loss": 0.9815,
764
- "step": 11600
765
- },
766
- {
767
- "epoch": 4.85,
768
- "learning_rate": 2.8703703703703706e-05,
769
- "loss": 0.9645,
770
- "step": 11700
771
- },
772
- {
773
- "epoch": 4.9,
774
- "learning_rate": 2.8472222222222223e-05,
775
- "loss": 0.9826,
776
- "step": 11800
777
- },
778
- {
779
- "epoch": 4.94,
780
- "learning_rate": 2.824074074074074e-05,
781
- "loss": 1.0036,
782
- "step": 11900
783
- },
784
- {
785
- "epoch": 4.98,
786
- "learning_rate": 2.8009259259259263e-05,
787
- "loss": 0.9994,
788
- "step": 12000
789
- },
790
- {
791
- "epoch": 4.98,
792
- "eval_bleu": 68.0869,
793
- "eval_em": 0.02,
794
- "eval_gen_len": 47.76,
795
- "eval_loss": 1.0576136112213135,
796
- "eval_rm": 0.8889,
797
- "eval_runtime": 48.2259,
798
- "eval_samples_per_second": 1.037,
799
- "eval_steps_per_second": 0.145,
800
- "step": 12000
801
- },
802
- {
803
- "epoch": 5.02,
804
- "learning_rate": 2.777777777777778e-05,
805
- "loss": 0.8445,
806
- "step": 12100
807
- },
808
- {
809
- "epoch": 5.06,
810
- "learning_rate": 2.75462962962963e-05,
811
- "loss": 0.833,
812
- "step": 12200
813
- },
814
- {
815
- "epoch": 5.1,
816
- "learning_rate": 2.7314814814814816e-05,
817
- "loss": 0.8034,
818
- "step": 12300
819
- },
820
- {
821
- "epoch": 5.15,
822
- "learning_rate": 2.7083333333333332e-05,
823
- "loss": 0.8154,
824
- "step": 12400
825
- },
826
- {
827
- "epoch": 5.19,
828
- "learning_rate": 2.6851851851851855e-05,
829
- "loss": 0.8026,
830
- "step": 12500
831
- },
832
- {
833
- "epoch": 5.23,
834
- "learning_rate": 2.6620370370370372e-05,
835
- "loss": 0.8147,
836
- "step": 12600
837
- },
838
- {
839
- "epoch": 5.27,
840
- "learning_rate": 2.6388888888888892e-05,
841
- "loss": 0.8466,
842
- "step": 12700
843
- },
844
- {
845
- "epoch": 5.31,
846
- "learning_rate": 2.615740740740741e-05,
847
- "loss": 0.7881,
848
- "step": 12800
849
- },
850
- {
851
- "epoch": 5.35,
852
- "learning_rate": 2.5925925925925925e-05,
853
- "loss": 0.7856,
854
- "step": 12900
855
- },
856
- {
857
- "epoch": 5.39,
858
- "learning_rate": 2.5694444444444445e-05,
859
- "loss": 0.8058,
860
- "step": 13000
861
- },
862
- {
863
- "epoch": 5.44,
864
- "learning_rate": 2.5462962962962965e-05,
865
- "loss": 0.7704,
866
- "step": 13100
867
- },
868
- {
869
- "epoch": 5.48,
870
- "learning_rate": 2.5231481481481485e-05,
871
- "loss": 0.7572,
872
- "step": 13200
873
- },
874
- {
875
- "epoch": 5.52,
876
- "learning_rate": 2.5e-05,
877
- "loss": 0.7757,
878
- "step": 13300
879
- },
880
- {
881
- "epoch": 5.56,
882
- "learning_rate": 2.4768518518518518e-05,
883
- "loss": 0.7598,
884
- "step": 13400
885
- },
886
- {
887
- "epoch": 5.6,
888
- "learning_rate": 2.4537037037037038e-05,
889
- "loss": 0.7268,
890
- "step": 13500
891
- },
892
- {
893
- "epoch": 5.64,
894
- "learning_rate": 2.4305555555555558e-05,
895
- "loss": 0.7455,
896
- "step": 13600
897
- },
898
- {
899
- "epoch": 5.68,
900
- "learning_rate": 2.4074074074074074e-05,
901
- "loss": 0.7525,
902
- "step": 13700
903
- },
904
- {
905
- "epoch": 5.73,
906
- "learning_rate": 2.3842592592592594e-05,
907
- "loss": 0.7205,
908
- "step": 13800
909
- },
910
- {
911
- "epoch": 5.77,
912
- "learning_rate": 2.361111111111111e-05,
913
- "loss": 0.7741,
914
- "step": 13900
915
- },
916
- {
917
- "epoch": 5.81,
918
- "learning_rate": 2.337962962962963e-05,
919
- "loss": 0.7275,
920
- "step": 14000
921
- },
922
- {
923
- "epoch": 5.81,
924
- "eval_bleu": 74.1032,
925
- "eval_em": 0.02,
926
- "eval_gen_len": 46.52,
927
- "eval_loss": 0.952226996421814,
928
- "eval_rm": 0.9556,
929
- "eval_runtime": 45.6891,
930
- "eval_samples_per_second": 1.094,
931
- "eval_steps_per_second": 0.153,
932
- "step": 14000
933
- },
934
- {
935
- "epoch": 5.85,
936
- "learning_rate": 2.314814814814815e-05,
937
- "loss": 0.7429,
938
- "step": 14100
939
- },
940
- {
941
- "epoch": 5.89,
942
- "learning_rate": 2.2916666666666667e-05,
943
- "loss": 0.7032,
944
- "step": 14200
945
- },
946
- {
947
- "epoch": 5.93,
948
- "learning_rate": 2.2685185185185187e-05,
949
- "loss": 0.749,
950
- "step": 14300
951
- },
952
- {
953
- "epoch": 5.98,
954
- "learning_rate": 2.2453703703703703e-05,
955
- "loss": 0.7122,
956
- "step": 14400
957
- },
958
- {
959
- "epoch": 6.02,
960
- "learning_rate": 2.2222222222222223e-05,
961
- "loss": 0.6559,
962
- "step": 14500
963
- },
964
- {
965
- "epoch": 6.06,
966
- "learning_rate": 2.1990740740740743e-05,
967
- "loss": 0.5757,
968
- "step": 14600
969
- },
970
- {
971
- "epoch": 6.1,
972
- "learning_rate": 2.175925925925926e-05,
973
- "loss": 0.5756,
974
- "step": 14700
975
- },
976
- {
977
- "epoch": 6.14,
978
- "learning_rate": 2.152777777777778e-05,
979
- "loss": 0.5777,
980
- "step": 14800
981
- },
982
- {
983
- "epoch": 6.18,
984
- "learning_rate": 2.1296296296296296e-05,
985
- "loss": 0.5904,
986
- "step": 14900
987
- },
988
- {
989
- "epoch": 6.22,
990
- "learning_rate": 2.1064814814814816e-05,
991
- "loss": 0.5798,
992
- "step": 15000
993
- },
994
- {
995
- "epoch": 6.27,
996
- "learning_rate": 2.0833333333333336e-05,
997
- "loss": 0.5939,
998
- "step": 15100
999
- },
1000
- {
1001
- "epoch": 6.31,
1002
- "learning_rate": 2.0601851851851853e-05,
1003
- "loss": 0.6095,
1004
- "step": 15200
1005
- },
1006
- {
1007
- "epoch": 6.35,
1008
- "learning_rate": 2.037037037037037e-05,
1009
- "loss": 0.6163,
1010
- "step": 15300
1011
- },
1012
- {
1013
- "epoch": 6.39,
1014
- "learning_rate": 2.013888888888889e-05,
1015
- "loss": 0.5634,
1016
- "step": 15400
1017
- },
1018
- {
1019
- "epoch": 6.43,
1020
- "learning_rate": 1.990740740740741e-05,
1021
- "loss": 0.5909,
1022
- "step": 15500
1023
- },
1024
- {
1025
- "epoch": 6.47,
1026
- "learning_rate": 1.967592592592593e-05,
1027
- "loss": 0.5408,
1028
- "step": 15600
1029
- },
1030
- {
1031
- "epoch": 6.51,
1032
- "learning_rate": 1.9444444444444445e-05,
1033
- "loss": 0.5435,
1034
- "step": 15700
1035
- },
1036
- {
1037
- "epoch": 6.56,
1038
- "learning_rate": 1.9212962962962962e-05,
1039
- "loss": 0.5753,
1040
- "step": 15800
1041
- },
1042
- {
1043
- "epoch": 6.6,
1044
- "learning_rate": 1.8981481481481482e-05,
1045
- "loss": 0.5491,
1046
- "step": 15900
1047
- },
1048
- {
1049
- "epoch": 6.64,
1050
- "learning_rate": 1.8750000000000002e-05,
1051
- "loss": 0.5868,
1052
- "step": 16000
1053
- },
1054
- {
1055
- "epoch": 6.64,
1056
- "eval_bleu": 71.6124,
1057
- "eval_em": 0.02,
1058
- "eval_gen_len": 47.52,
1059
- "eval_loss": 0.9307076930999756,
1060
- "eval_rm": 0.9556,
1061
- "eval_runtime": 330.299,
1062
- "eval_samples_per_second": 0.151,
1063
- "eval_steps_per_second": 0.021,
1064
- "step": 16000
1065
- },
1066
- {
1067
- "epoch": 6.68,
1068
- "learning_rate": 1.8518518518518518e-05,
1069
- "loss": 0.5088,
1070
- "step": 16100
1071
- },
1072
- {
1073
- "epoch": 6.72,
1074
- "learning_rate": 1.8287037037037038e-05,
1075
- "loss": 0.6068,
1076
- "step": 16200
1077
- },
1078
- {
1079
- "epoch": 6.76,
1080
- "learning_rate": 1.8055555555555555e-05,
1081
- "loss": 0.5327,
1082
- "step": 16300
1083
- },
1084
- {
1085
- "epoch": 6.8,
1086
- "learning_rate": 1.7824074074074075e-05,
1087
- "loss": 0.5759,
1088
- "step": 16400
1089
- },
1090
- {
1091
- "epoch": 6.85,
1092
- "learning_rate": 1.7592592592592595e-05,
1093
- "loss": 0.5849,
1094
- "step": 16500
1095
- },
1096
- {
1097
- "epoch": 6.89,
1098
- "learning_rate": 1.736111111111111e-05,
1099
- "loss": 0.5648,
1100
- "step": 16600
1101
- },
1102
- {
1103
- "epoch": 6.93,
1104
- "learning_rate": 1.712962962962963e-05,
1105
- "loss": 0.5632,
1106
- "step": 16700
1107
- },
1108
- {
1109
- "epoch": 6.97,
1110
- "learning_rate": 1.6898148148148148e-05,
1111
- "loss": 0.5093,
1112
- "step": 16800
1113
- },
1114
- {
1115
- "epoch": 7.01,
1116
- "learning_rate": 1.6666666666666667e-05,
1117
- "loss": 0.5295,
1118
- "step": 16900
1119
- },
1120
- {
1121
- "epoch": 7.05,
1122
- "learning_rate": 1.6435185185185187e-05,
1123
- "loss": 0.433,
1124
- "step": 17000
1125
- },
1126
- {
1127
- "epoch": 7.1,
1128
- "learning_rate": 1.6203703703703704e-05,
1129
- "loss": 0.4289,
1130
- "step": 17100
1131
- },
1132
- {
1133
- "epoch": 7.14,
1134
- "learning_rate": 1.597222222222222e-05,
1135
- "loss": 0.4619,
1136
- "step": 17200
1137
- },
1138
- {
1139
- "epoch": 7.18,
1140
- "learning_rate": 1.574074074074074e-05,
1141
- "loss": 0.458,
1142
- "step": 17300
1143
- },
1144
- {
1145
- "epoch": 7.22,
1146
- "learning_rate": 1.550925925925926e-05,
1147
- "loss": 0.4155,
1148
- "step": 17400
1149
- },
1150
- {
1151
- "epoch": 7.26,
1152
- "learning_rate": 1.527777777777778e-05,
1153
- "loss": 0.4391,
1154
- "step": 17500
1155
- },
1156
- {
1157
- "epoch": 7.3,
1158
- "learning_rate": 1.5046296296296297e-05,
1159
- "loss": 0.4264,
1160
- "step": 17600
1161
- },
1162
- {
1163
- "epoch": 7.34,
1164
- "learning_rate": 1.4814814814814815e-05,
1165
- "loss": 0.418,
1166
- "step": 17700
1167
- },
1168
- {
1169
- "epoch": 7.39,
1170
- "learning_rate": 1.4583333333333335e-05,
1171
- "loss": 0.4285,
1172
- "step": 17800
1173
- },
1174
- {
1175
- "epoch": 7.43,
1176
- "learning_rate": 1.4351851851851853e-05,
1177
- "loss": 0.4466,
1178
- "step": 17900
1179
- },
1180
- {
1181
- "epoch": 7.47,
1182
- "learning_rate": 1.412037037037037e-05,
1183
- "loss": 0.4499,
1184
- "step": 18000
1185
- },
1186
- {
1187
- "epoch": 7.47,
1188
- "eval_bleu": 77.237,
1189
- "eval_em": 0.06,
1190
- "eval_gen_len": 46.0,
1191
- "eval_loss": 0.8866045475006104,
1192
- "eval_rm": 0.9574,
1193
- "eval_runtime": 280.5418,
1194
- "eval_samples_per_second": 0.178,
1195
- "eval_steps_per_second": 0.025,
1196
- "step": 18000
1197
- },
1198
- {
1199
- "epoch": 7.51,
1200
- "learning_rate": 1.388888888888889e-05,
1201
- "loss": 0.4415,
1202
- "step": 18100
1203
- },
1204
- {
1205
- "epoch": 7.55,
1206
- "learning_rate": 1.3657407407407408e-05,
1207
- "loss": 0.4209,
1208
- "step": 18200
1209
- },
1210
- {
1211
- "epoch": 7.59,
1212
- "learning_rate": 1.3425925925925928e-05,
1213
- "loss": 0.4357,
1214
- "step": 18300
1215
- },
1216
- {
1217
- "epoch": 7.63,
1218
- "learning_rate": 1.3194444444444446e-05,
1219
- "loss": 0.437,
1220
- "step": 18400
1221
- },
1222
- {
1223
- "epoch": 7.68,
1224
- "learning_rate": 1.2962962962962962e-05,
1225
- "loss": 0.4319,
1226
- "step": 18500
1227
- },
1228
- {
1229
- "epoch": 7.72,
1230
- "learning_rate": 1.2731481481481482e-05,
1231
- "loss": 0.4578,
1232
- "step": 18600
1233
- },
1234
- {
1235
- "epoch": 7.76,
1236
- "learning_rate": 1.25e-05,
1237
- "loss": 0.4244,
1238
- "step": 18700
1239
- },
1240
- {
1241
- "epoch": 7.8,
1242
- "learning_rate": 1.2268518518518519e-05,
1243
- "loss": 0.4046,
1244
- "step": 18800
1245
- },
1246
- {
1247
- "epoch": 7.84,
1248
- "learning_rate": 1.2037037037037037e-05,
1249
- "loss": 0.411,
1250
- "step": 18900
1251
- },
1252
- {
1253
- "epoch": 7.88,
1254
- "learning_rate": 1.1805555555555555e-05,
1255
- "loss": 0.4219,
1256
- "step": 19000
1257
- },
1258
- {
1259
- "epoch": 7.93,
1260
- "learning_rate": 1.1574074074074075e-05,
1261
- "loss": 0.3956,
1262
- "step": 19100
1263
- },
1264
- {
1265
- "epoch": 7.97,
1266
- "learning_rate": 1.1342592592592593e-05,
1267
- "loss": 0.4333,
1268
- "step": 19200
1269
- },
1270
- {
1271
- "epoch": 8.01,
1272
- "learning_rate": 1.1111111111111112e-05,
1273
- "loss": 0.4141,
1274
- "step": 19300
1275
- },
1276
- {
1277
- "epoch": 8.05,
1278
- "learning_rate": 1.087962962962963e-05,
1279
- "loss": 0.3199,
1280
- "step": 19400
1281
- },
1282
- {
1283
- "epoch": 8.09,
1284
- "learning_rate": 1.0648148148148148e-05,
1285
- "loss": 0.3236,
1286
- "step": 19500
1287
- },
1288
- {
1289
- "epoch": 8.13,
1290
- "learning_rate": 1.0416666666666668e-05,
1291
- "loss": 0.3405,
1292
- "step": 19600
1293
- },
1294
- {
1295
- "epoch": 8.17,
1296
- "learning_rate": 1.0185185185185185e-05,
1297
- "loss": 0.3411,
1298
- "step": 19700
1299
- },
1300
- {
1301
- "epoch": 8.22,
1302
- "learning_rate": 9.953703703703704e-06,
1303
- "loss": 0.3166,
1304
- "step": 19800
1305
- },
1306
- {
1307
- "epoch": 8.26,
1308
- "learning_rate": 9.722222222222223e-06,
1309
- "loss": 0.3463,
1310
- "step": 19900
1311
- },
1312
- {
1313
- "epoch": 8.3,
1314
- "learning_rate": 9.490740740740741e-06,
1315
- "loss": 0.3515,
1316
- "step": 20000
1317
- },
1318
- {
1319
- "epoch": 8.3,
1320
- "eval_bleu": 77.5798,
1321
- "eval_em": 0.08,
1322
- "eval_gen_len": 47.5,
1323
- "eval_loss": 0.9069581627845764,
1324
- "eval_rm": 0.9574,
1325
- "eval_runtime": 312.8421,
1326
- "eval_samples_per_second": 0.16,
1327
- "eval_steps_per_second": 0.022,
1328
- "step": 20000
1329
- }
1330
- ],
1331
- "max_steps": 24100,
1332
- "num_train_epochs": 10,
1333
- "total_flos": 5827422495665664.0,
1334
- "trial_name": null,
1335
- "trial_params": null
1336
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f39883603e0f5d5f423c83fa5b774cffc6fd43f8970ddb1f1665c98b8cf652f9
3
- size 4219
 
 
 
 
last-checkpoint/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a0d0aa4eebf05151380698116acb15efc7e866ed933da42068ae8c22cfa382e
3
  size 996026489
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2f7ee2080980e630ae668e1dc510bdf00cfdff869a925b78d26ae1342fb7fb
3
  size 996026489
runs/May23_11-27-41_4b659dec4dc1/events.out.tfevents.1684841942.4b659dec4dc1.6139.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46bed9901348db72ec0ddf87172765591157e7e6f99f5dd9c06ca85bd762ce33
3
- size 17952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cfbe36ece9f438d2ff746c1d77040ad966b7b1853315d14c80cd69774115aac
3
+ size 25814