JulienRPA commited on
Commit
53a06c0
1 Parent(s): 798c1a6

Model save

Browse files
last-checkpoint/generation_config.json → generation_config.json RENAMED
File without changes
last-checkpoint/added_tokens.json DELETED
@@ -1,446 +0,0 @@
1
- {
2
- "ACV": 31381,
3
- "Absolute": 31398,
4
- "Acidic": 31331,
5
- "Additive": 31301,
6
- "Admittance": 31228,
7
- "Affinity": 31407,
8
- "Alkaline": 31410,
9
- "Alternating": 31428,
10
- "Aluminum": 31446,
11
- "Amount": 31283,
12
- "Amperostat": 31359,
13
- "Analyzer": 31324,
14
- "Angle": 31420,
15
- "Angluar": 31356,
16
- "Anglular": 31348,
17
- "Angstrom": 31193,
18
- "Angular": 31472,
19
- "Anodic": 31334,
20
- "Approximation": 31440,
21
- "Aqueous": 31346,
22
- "Areal": 31425,
23
- "Areic": 31462,
24
- "Artifact": 31456,
25
- "Atom": 31352,
26
- "Auxiliary": 31387,
27
- "BAE": 31368,
28
- "Bandgap": 31421,
29
- "Battery": 31328,
30
- "Binary": 31307,
31
- "Binder": 31297,
32
- "Bode": 31191,
33
- "Breadth": 31338,
34
- "Bulk": 31196,
35
- "Butler": 31310,
36
- "Button": 31467,
37
- "CCCV": 31312,
38
- "CMC": 31315,
39
- "CPE": 31202,
40
- "CSV": 31336,
41
- "Capacitance": 31238,
42
- "Capacity": 31295,
43
- "Carnot": 31363,
44
- "Cathodic": 31357,
45
- "Characterisation": 31214,
46
- "Charge": 31460,
47
- "Charging": 31265,
48
- "Closed": 31285,
49
- "Cobalt": 31350,
50
- "Coefficient": 31289,
51
- "Coin": 31270,
52
- "Collector": 31445,
53
- "Conductance": 31263,
54
- "Conductive": 31454,
55
- "Conductivity": 31395,
56
- "Conductor": 31284,
57
- "Continuant": 31287,
58
- "Continuum": 31430,
59
- "Conversion": 31333,
60
- "Coulombic": 31218,
61
- "Coupling": 31192,
62
- "Coverage": 31200,
63
- "Crystalization": 31400,
64
- "Curve": 31215,
65
- "Cutoff": 31256,
66
- "Cutting": 31216,
67
- "Cycling": 31261,
68
- "Cylindrical": 31325,
69
- "DFN": 31293,
70
- "DMC": 31302,
71
- "DME": 31457,
72
- "DPV": 31183,
73
- "Debye": 31286,
74
- "Dedomena": 31222,
75
- "Discharge": 31274,
76
- "Discharging": 31452,
77
- "Disintegration": 31469,
78
- "Displacement": 31451,
79
- "Dissociation": 31354,
80
- "Donnan": 31210,
81
- "Doyle": 31366,
82
- "Drag": 31306,
83
- "EASA": 31320,
84
- "ECSA": 31418,
85
- "EIS": 31187,
86
- "EMC": 31409,
87
- "Elaboration": 31358,
88
- "Electrochemical": 31384,
89
- "Electrochemically": 31424,
90
- "Electrode": 31322,
91
- "Electrolyte": 31379,
92
- "Electrolytic": 31185,
93
- "Electromagnetic": 31203,
94
- "Electronically": 31471,
95
- "Electrons": 31271,
96
- "Electrooxidation": 31374,
97
- "Electroreduction": 31235,
98
- "Elemental": 31288,
99
- "Elementary": 31405,
100
- "Embarassingly": 31329,
101
- "Encoded": 31246,
102
- "Endurant": 31355,
103
- "Engineered": 31390,
104
- "Entity": 31294,
105
- "Equilibrium": 31209,
106
- "Equipment": 31197,
107
- "Erdey": 31237,
108
- "Executable": 31205,
109
- "Expansion": 31275,
110
- "Extinction": 31199,
111
- "FEC": 31389,
112
- "FRA": 31221,
113
- "Faraday": 31367,
114
- "Fermionic": 31364,
115
- "Float": 31231,
116
- "Forming": 31220,
117
- "Friction": 31386,
118
- "Fuller": 31305,
119
- "GDE": 31251,
120
- "Galvanizing": 31442,
121
- "Galvanostatic": 31415,
122
- "Generative": 31253,
123
- "Gravimetric": 31435,
124
- "Gruz": 31280,
125
- "Gyromagnetic": 31378,
126
- "Helmholtz": 31239,
127
- "Hold": 31250,
128
- "Holistic": 31244,
129
- "Icon": 31369,
130
- "Impedance": 31273,
131
- "Indifferent": 31184,
132
- "Inductance": 31223,
133
- "Inductive": 31383,
134
- "Inert": 31186,
135
- "Insertion": 31433,
136
- "Insulance": 31402,
137
- "Intangible": 31373,
138
- "Integral": 31427,
139
- "Intentional": 31317,
140
- "Intercalation": 31225,
141
- "Intermittent": 31245,
142
- "Interruption": 31240,
143
- "Invariant": 31278,
144
- "Inverse": 31453,
145
- "Ionic": 31434,
146
- "Ionization": 31319,
147
- "Jar": 31464,
148
- "Kynar": 31343,
149
- "LCO": 31401,
150
- "LFP": 31316,
151
- "LMO": 31303,
152
- "LNO": 31224,
153
- "LSV": 31365,
154
- "LTO": 31188,
155
- "Larmor": 31392,
156
- "Lifetime": 31236,
157
- "Limit": 31298,
158
- "Limiting": 31432,
159
- "Lineic": 31466,
160
- "Liquid": 31441,
161
- "Lithium": 31423,
162
- "Loading": 31266,
163
- "Loose": 31397,
164
- "Lorenz": 31309,
165
- "Macroscopic": 31361,
166
- "Magnetizing": 31399,
167
- "Magnetogyric": 31388,
168
- "Manganese": 31443,
169
- "Massic": 31272,
170
- "Maximal": 31360,
171
- "Measurand": 31347,
172
- "Measured": 31217,
173
- "Mereological": 31242,
174
- "Micron": 31438,
175
- "Minute": 31465,
176
- "Mixture": 31431,
177
- "Modulus": 31308,
178
- "Molar": 31201,
179
- "Molarity": 31257,
180
- "Mole": 31351,
181
- "Molecule": 31447,
182
- "Moment": 31254,
183
- "Multiplicity": 31323,
184
- "NCA": 31439,
185
- "NCM": 31448,
186
- "NHE": 31342,
187
- "NMC": 31414,
188
- "NPV": 31195,
189
- "Nernst": 31417,
190
- "Neutral": 31335,
191
- "Nickel": 31436,
192
- "Nucleon": 31264,
193
- "Numeral": 31458,
194
- "OCV": 31416,
195
- "OSWV": 31207,
196
- "Occurrent": 31337,
197
- "Ohmic": 31403,
198
- "Ordinary": 31455,
199
- "Osmotic": 31313,
200
- "Osteryoung": 31194,
201
- "Overpotential": 31429,
202
- "Overvoltage": 31259,
203
- "Oxidation": 31226,
204
- "Oxide": 31394,
205
- "PTFE": 31376,
206
- "PVDF": 31468,
207
- "Partion": 31426,
208
- "Perdurant": 31233,
209
- "Permeability": 31304,
210
- "Permittivity": 31345,
211
- "Phosphate": 31385,
212
- "Plane": 31437,
213
- "Plot": 31375,
214
- "Polarization": 31396,
215
- "Polarography": 31372,
216
- "Potentiostatic": 31269,
217
- "Pouch": 31268,
218
- "Pressureless": 31327,
219
- "Primitive": 31419,
220
- "Processed": 31299,
221
- "Qt": 31249,
222
- "Quantity": 31291,
223
- "Radioactive": 31408,
224
- "Radius": 31241,
225
- "Reachable": 31211,
226
- "Reactance": 31296,
227
- "Reactive": 31281,
228
- "Reciprocal": 31230,
229
- "Reluctance": 31413,
230
- "Repetency": 31255,
231
- "Resistivity": 31341,
232
- "Retention": 31382,
233
- "Reversal": 31258,
234
- "Reversible": 31473,
235
- "Rolling": 31461,
236
- "SAE": 31189,
237
- "SBR": 31349,
238
- "SEI": 31318,
239
- "SHE": 31326,
240
- "SWV": 31277,
241
- "Scan": 31267,
242
- "Selectivity": 31362,
243
- "Semiosis": 31292,
244
- "Semiotic": 31377,
245
- "Separator": 31260,
246
- "Shear": 31262,
247
- "Shelf": 31444,
248
- "Shuttlecock": 31321,
249
- "Simulacrum": 31449,
250
- "Sintering": 31463,
251
- "Solubility": 31290,
252
- "Solute": 31332,
253
- "Solvent": 31243,
254
- "Spatially": 31353,
255
- "Spatiialnteraction": 31314,
256
- "Spatio": 31340,
257
- "Stopping": 31370,
258
- "Strength": 31198,
259
- "Substance": 31190,
260
- "Supercapacitor": 31412,
261
- "Swagelok": 31248,
262
- "Sweep": 31344,
263
- "Swing": 31232,
264
- "TAST": 31404,
265
- "TFSI": 31252,
266
- "Tangible": 31422,
267
- "Teflon": 31219,
268
- "Tension": 31276,
269
- "Theorization": 31406,
270
- "Thermodynamic": 31208,
271
- "Thermogalvanic": 31247,
272
- "Tiling": 31470,
273
- "Titanium": 31380,
274
- "Titration": 31229,
275
- "Transferred": 31300,
276
- "Transferrence": 31339,
277
- "Transmittance": 31459,
278
- "Vacuum": 31282,
279
- "Vapour": 31212,
280
- "Viscosity": 31311,
281
- "Volmer": 31393,
282
- "Voltameter": 31279,
283
- "Voltammetry": 31213,
284
- "Volumetric": 31450,
285
- "Volumic": 31411,
286
- "Wholistic": 31227,
287
- "Window": 31206,
288
- "Workflow": 31391,
289
- "Youngs": 31204,
290
- "Zinc": 31234,
291
- "and_logical": 31534,
292
- "attr_close": 31519,
293
- "attr_open": 31531,
294
- "avg": 31485,
295
- "battery__": 31182,
296
- "bool_not": 31518,
297
- "brack_close": 31530,
298
- "brack_open": 31538,
299
- "brick__": 31163,
300
- "ceil": 31516,
301
- "choose_by_max": 31477,
302
- "choose_by_min": 31515,
303
- "coalesce": 31491,
304
- "concat": 31497,
305
- "constant__": 31171,
306
- "csvw__": 31127,
307
- "datatype": 31480,
308
- "dbo__": 31156,
309
- "dbp__": 31167,
310
- "dbr__": 31165,
311
- "dc__": 31151,
312
- "dcam__": 31132,
313
- "dcat__": 31120,
314
- "dcmitype__": 31142,
315
- "dcterms__": 31168,
316
- "doap__": 31118,
317
- "dur_to_usecs": 31508,
318
- "electrochemistry__": 31181,
319
- "electrochemistrys__": 31148,
320
- "electromotive": 31371,
321
- "emmo__": 31116,
322
- "encode_for_uri": 31489,
323
- "foaf__": 31162,
324
- "geo__": 31158,
325
- "group_concat": 31484,
326
- "hamming_dist": 31475,
327
- "haversine_dist": 31503,
328
- "isblank": 31482,
329
- "isiri": 31513,
330
- "isliteral": 31494,
331
- "isnumeric": 31496,
332
- "isuri": 31481,
333
- "lang_at": 31528,
334
- "langmatches": 31486,
335
- "lcase": 31509,
336
- "levenshtein_dist": 31488,
337
- "log2": 31474,
338
- "manufacturing__": 31125,
339
- "map__": 31170,
340
- "math_eql": 31539,
341
- "math_geq": 31535,
342
- "math_gt": 31536,
343
- "math_leq": 31521,
344
- "math_lt": 31537,
345
- "math_neq": 31523,
346
- "md5": 31504,
347
- "odrl__": 31136,
348
- "or_logical": 31524,
349
- "org__": 31175,
350
- "owl__": 31131,
351
- "p__": 31176,
352
- "par_close": 31526,
353
- "par_open": 31520,
354
- "periodictable__": 31145,
355
- "pmanufacturing__": 31149,
356
- "pq__": 31122,
357
- "pqn__": 31174,
358
- "pqv__": 31128,
359
- "pr__": 31166,
360
- "prefix__": 31119,
361
- "prn__": 31146,
362
- "prof__": 31124,
363
- "prov__": 31159,
364
- "prv__": 31173,
365
- "ps__": 31141,
366
- "psn__": 31140,
367
- "psv__": 31129,
368
- "qb__": 31137,
369
- "qkdv__": 31157,
370
- "quantitykind__": 31154,
371
- "qudt__": 31178,
372
- "quote_str": 31517,
373
- "radians": 31512,
374
- "rand": 31495,
375
- "rdf__": 31134,
376
- "rdfs__": 31138,
377
- "regex": 31476,
378
- "rounddown": 31511,
379
- "roundup": 31499,
380
- "sameterm": 31493,
381
- "schema__": 31160,
382
- "seconds_dbl": 31483,
383
- "sep_com": 31525,
384
- "sep_dot": 31529,
385
- "sep_semi": 31532,
386
- "sh__": 31153,
387
- "sha1": 31506,
388
- "sha256": 31487,
389
- "sha384": 31507,
390
- "sha512": 31502,
391
- "skos__": 31130,
392
- "soqk__": 31164,
393
- "sosa__": 31150,
394
- "sou__": 31155,
395
- "ssn__": 31152,
396
- "str_type": 31533,
397
- "strafter": 31500,
398
- "strbefore": 31498,
399
- "strends": 31479,
400
- "strlen": 31510,
401
- "strstarts": 31514,
402
- "swrl__": 31144,
403
- "time__": 31161,
404
- "timezone": 31478,
405
- "tz": 31490,
406
- "ucase": 31505,
407
- "unit__": 31177,
408
- "usecs_to_dur": 31501,
409
- "value0": 31550,
410
- "value1": 31551,
411
- "value2": 31552,
412
- "value3": 31553,
413
- "value4": 31554,
414
- "value5": 31555,
415
- "value6": 31556,
416
- "value7": 31557,
417
- "value8": 31558,
418
- "value9": 31559,
419
- "vann__": 31143,
420
- "var0": 31540,
421
- "var1": 31541,
422
- "var2": 31542,
423
- "var3": 31543,
424
- "var4": 31544,
425
- "var5": 31545,
426
- "var6": 31546,
427
- "var7": 31547,
428
- "var8": 31548,
429
- "var9": 31549,
430
- "var_": 31522,
431
- "varp": 31492,
432
- "void__": 31123,
433
- "voltammetry": 31330,
434
- "wd__": 31139,
435
- "wdata__": 31135,
436
- "wdno__": 31169,
437
- "wdref__": 31179,
438
- "wds__": 31117,
439
- "wdt__": 31147,
440
- "wdtn__": 31126,
441
- "wdv__": 31133,
442
- "wgs__": 31180,
443
- "wildcard": 31527,
444
- "xml__": 31121,
445
- "xsd__": 31172
446
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/config.json DELETED
@@ -1,182 +0,0 @@
1
- {
2
- "_commit_hash": "f79f78df17138d38be1b6aa5041a13bf715c3e86",
3
- "_name_or_path": "JulienRPA/BERT2BERT_pretrained_LC-QuAD_2.0",
4
- "architectures": [
5
- "EncoderDecoderModel"
6
- ],
7
- "decoder": {
8
- "_name_or_path": "decoder_added_vocab",
9
- "add_cross_attention": true,
10
- "architectures": [
11
- "BertModel"
12
- ],
13
- "attention_probs_dropout_prob": 0.1,
14
- "bad_words_ids": null,
15
- "begin_suppress_tokens": null,
16
- "bos_token_id": null,
17
- "chunk_size_feed_forward": 0,
18
- "classifier_dropout": null,
19
- "cross_attention_hidden_size": null,
20
- "decoder_start_token_id": null,
21
- "diversity_penalty": 0.0,
22
- "do_sample": false,
23
- "early_stopping": false,
24
- "encoder_no_repeat_ngram_size": 0,
25
- "eos_token_id": null,
26
- "exponential_decay_length_penalty": null,
27
- "finetuning_task": null,
28
- "forced_bos_token_id": null,
29
- "forced_eos_token_id": null,
30
- "gradient_checkpointing": false,
31
- "hidden_act": "gelu",
32
- "hidden_dropout_prob": 0.1,
33
- "hidden_size": 768,
34
- "id2label": {
35
- "0": "LABEL_0",
36
- "1": "LABEL_1"
37
- },
38
- "initializer_range": 0.02,
39
- "intermediate_size": 3072,
40
- "is_decoder": true,
41
- "is_encoder_decoder": false,
42
- "label2id": {
43
- "LABEL_0": 0,
44
- "LABEL_1": 1
45
- },
46
- "layer_norm_eps": 1e-12,
47
- "length_penalty": 1.0,
48
- "max_length": 20,
49
- "max_position_embeddings": 512,
50
- "min_length": 0,
51
- "model_type": "bert",
52
- "no_repeat_ngram_size": 0,
53
- "num_attention_heads": 12,
54
- "num_beam_groups": 1,
55
- "num_beams": 1,
56
- "num_hidden_layers": 12,
57
- "num_return_sequences": 1,
58
- "output_attentions": false,
59
- "output_hidden_states": false,
60
- "output_scores": false,
61
- "pad_token_id": 0,
62
- "position_embedding_type": "absolute",
63
- "prefix": null,
64
- "problem_type": null,
65
- "pruned_heads": {},
66
- "remove_invalid_values": false,
67
- "repetition_penalty": 1.0,
68
- "return_dict": true,
69
- "return_dict_in_generate": false,
70
- "sep_token_id": null,
71
- "suppress_tokens": null,
72
- "task_specific_params": null,
73
- "temperature": 1.0,
74
- "tf_legacy_loss": false,
75
- "tie_encoder_decoder": false,
76
- "tie_word_embeddings": true,
77
- "tokenizer_class": null,
78
- "top_k": 50,
79
- "top_p": 1.0,
80
- "torch_dtype": "float32",
81
- "torchscript": false,
82
- "transformers_version": "4.30.0.dev0",
83
- "type_vocab_size": 2,
84
- "typical_p": 1.0,
85
- "use_bfloat16": false,
86
- "use_cache": true,
87
- "vocab_size": 31560
88
- },
89
- "decoder_start_token_id": 101,
90
- "early_stopping": true,
91
- "encoder": {
92
- "_name_or_path": "encoder_added_vocab",
93
- "add_cross_attention": false,
94
- "architectures": [
95
- "BertModel"
96
- ],
97
- "attention_probs_dropout_prob": 0.1,
98
- "bad_words_ids": null,
99
- "begin_suppress_tokens": null,
100
- "bos_token_id": null,
101
- "chunk_size_feed_forward": 0,
102
- "classifier_dropout": null,
103
- "cross_attention_hidden_size": null,
104
- "decoder_start_token_id": null,
105
- "diversity_penalty": 0.0,
106
- "do_sample": false,
107
- "early_stopping": false,
108
- "encoder_no_repeat_ngram_size": 0,
109
- "eos_token_id": null,
110
- "exponential_decay_length_penalty": null,
111
- "finetuning_task": null,
112
- "forced_bos_token_id": null,
113
- "forced_eos_token_id": null,
114
- "hidden_act": "gelu",
115
- "hidden_dropout_prob": 0.1,
116
- "hidden_size": 768,
117
- "id2label": {
118
- "0": "LABEL_0",
119
- "1": "LABEL_1"
120
- },
121
- "initializer_range": 0.02,
122
- "intermediate_size": 3072,
123
- "is_decoder": false,
124
- "is_encoder_decoder": false,
125
- "label2id": {
126
- "LABEL_0": 0,
127
- "LABEL_1": 1
128
- },
129
- "layer_norm_eps": 1e-12,
130
- "length_penalty": 1.0,
131
- "max_length": 20,
132
- "max_position_embeddings": 512,
133
- "min_length": 0,
134
- "model_type": "bert",
135
- "no_repeat_ngram_size": 0,
136
- "num_attention_heads": 12,
137
- "num_beam_groups": 1,
138
- "num_beams": 1,
139
- "num_hidden_layers": 12,
140
- "num_return_sequences": 1,
141
- "output_attentions": false,
142
- "output_hidden_states": false,
143
- "output_scores": false,
144
- "pad_token_id": 0,
145
- "position_embedding_type": "absolute",
146
- "prefix": null,
147
- "problem_type": null,
148
- "pruned_heads": {},
149
- "remove_invalid_values": false,
150
- "repetition_penalty": 1.0,
151
- "return_dict": true,
152
- "return_dict_in_generate": false,
153
- "sep_token_id": null,
154
- "suppress_tokens": null,
155
- "task_specific_params": null,
156
- "temperature": 1.0,
157
- "tf_legacy_loss": false,
158
- "tie_encoder_decoder": false,
159
- "tie_word_embeddings": true,
160
- "tokenizer_class": null,
161
- "top_k": 50,
162
- "top_p": 1.0,
163
- "torch_dtype": "float32",
164
- "torchscript": false,
165
- "transformers_version": "4.30.0.dev0",
166
- "type_vocab_size": 2,
167
- "typical_p": 1.0,
168
- "use_bfloat16": false,
169
- "use_cache": true,
170
- "vocab_size": 31560
171
- },
172
- "eos_token_id": 102,
173
- "is_encoder_decoder": true,
174
- "max_length": 256,
175
- "min_length": 16,
176
- "model_type": "encoder-decoder",
177
- "num_beams": 10,
178
- "pad_token_id": 0,
179
- "torch_dtype": "float32",
180
- "transformers_version": null,
181
- "vocab_size": 31560
182
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dc14a418d59224cba58e564b6d6eef9aa9100a97c4b5e0ac5c3cbc885eaf925
3
- size 1987250795
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcb324ae10c9946ff9ef4af6f63f22b2dd06013b12fa4e88d3618c532cbf3f06
3
- size 996026489
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8236b1efeea9a0cb188d1516c456991f957b0cd275f7723797fadddb72ce7a7a
3
- size 14575
 
 
 
 
last-checkpoint/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ca778e3dad5cb50f422990d67ea86870f277935e7ee0e0941e09a709f60546d
3
- size 557
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:92153a01a9bc6795849b819ba557c141c4265ba3ff9cfeb2e9038a64ec17c505
3
- size 627
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
last-checkpoint/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "clean_up_tokenization_spaces": true,
3
- "cls_token": "[CLS]",
4
- "do_basic_tokenize": true,
5
- "do_lower_case": false,
6
- "mask_token": "[MASK]",
7
- "model_max_length": 256,
8
- "never_split": null,
9
- "pad_token": "[PAD]",
10
- "sep_token": "[SEP]",
11
- "strip_accents": null,
12
- "tokenize_chinese_chars": true,
13
- "tokenizer_class": "BertTokenizer",
14
- "unk_token": "[UNK]"
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,124 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 300.0,
5
- "global_step": 6000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 50.0,
12
- "learning_rate": 2.4875e-05,
13
- "loss": 2.9129,
14
- "step": 1000
15
- },
16
- {
17
- "epoch": 50.0,
18
- "eval_bleu": 92.7571,
19
- "eval_em": 0.2302,
20
- "eval_gen_len": 79.2806,
21
- "eval_loss": 0.3861088156700134,
22
- "eval_rm": 0.2422,
23
- "eval_runtime": 119.9581,
24
- "eval_samples_per_second": 3.476,
25
- "eval_steps_per_second": 0.225,
26
- "step": 1000
27
- },
28
- {
29
- "epoch": 100.0,
30
- "learning_rate": 4.9875000000000006e-05,
31
- "loss": 0.052,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 100.0,
36
- "eval_bleu": 95.3257,
37
- "eval_em": 0.5156,
38
- "eval_gen_len": 80.6859,
39
- "eval_loss": 0.3531915545463562,
40
- "eval_rm": 0.5635,
41
- "eval_runtime": 114.8636,
42
- "eval_samples_per_second": 3.63,
43
- "eval_steps_per_second": 0.235,
44
- "step": 2000
45
- },
46
- {
47
- "epoch": 150.0,
48
- "learning_rate": 3.75625e-05,
49
- "loss": 0.0155,
50
- "step": 3000
51
- },
52
- {
53
- "epoch": 150.0,
54
- "eval_bleu": 94.8017,
55
- "eval_em": 0.5659,
56
- "eval_gen_len": 79.3165,
57
- "eval_loss": 0.4279763698577881,
58
- "eval_rm": 0.6355,
59
- "eval_runtime": 113.6979,
60
- "eval_samples_per_second": 3.668,
61
- "eval_steps_per_second": 0.237,
62
- "step": 3000
63
- },
64
- {
65
- "epoch": 200.0,
66
- "learning_rate": 2.50625e-05,
67
- "loss": 0.0086,
68
- "step": 4000
69
- },
70
- {
71
- "epoch": 200.0,
72
- "eval_bleu": 95.5885,
73
- "eval_em": 0.5803,
74
- "eval_gen_len": 80.6978,
75
- "eval_loss": 0.40772923827171326,
76
- "eval_rm": 0.6283,
77
- "eval_runtime": 118.0147,
78
- "eval_samples_per_second": 3.533,
79
- "eval_steps_per_second": 0.229,
80
- "step": 4000
81
- },
82
- {
83
- "epoch": 250.0,
84
- "learning_rate": 1.2562499999999999e-05,
85
- "loss": 0.0051,
86
- "step": 5000
87
- },
88
- {
89
- "epoch": 250.0,
90
- "eval_bleu": 95.5923,
91
- "eval_em": 0.6019,
92
- "eval_gen_len": 80.6523,
93
- "eval_loss": 0.41692906618118286,
94
- "eval_rm": 0.6523,
95
- "eval_runtime": 120.2437,
96
- "eval_samples_per_second": 3.468,
97
- "eval_steps_per_second": 0.225,
98
- "step": 5000
99
- },
100
- {
101
- "epoch": 300.0,
102
- "learning_rate": 6.250000000000001e-08,
103
- "loss": 0.0038,
104
- "step": 6000
105
- },
106
- {
107
- "epoch": 300.0,
108
- "eval_bleu": 95.481,
109
- "eval_em": 0.6019,
110
- "eval_gen_len": 80.4676,
111
- "eval_loss": 0.42530182003974915,
112
- "eval_rm": 0.6547,
113
- "eval_runtime": 133.449,
114
- "eval_samples_per_second": 3.125,
115
- "eval_steps_per_second": 0.202,
116
- "step": 6000
117
- }
118
- ],
119
- "max_steps": 6000,
120
- "num_train_epochs": 300,
121
- "total_flos": 1.014399554069376e+16,
122
- "trial_name": null,
123
- "trial_params": null
124
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e413ba21b6953cf3f70964ee472c90db4949de528b021cf4b1b2e3ae81f8a6c4
3
- size 4155
 
 
 
 
last-checkpoint/vocab.txt DELETED
The diff for this file is too large to render. See raw diff
 
runs/May24_08-16-03_555aebd6415b/events.out.tfevents.1684916185.555aebd6415b.20495.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2a18636c33728d779873fe4ff60ce1d282acf6f14be5b3378c3077e3bc8ff02
3
- size 12278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe7f4ca90744d04008e36732e5862253396fd9e9c82d17e6802782619f666ba
3
+ size 12632