bakrianoo commited on
Commit
7b25d84
1 Parent(s): b072ea0

Version-1.2

Browse files
config.json CHANGED
@@ -11,48 +11,48 @@
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
  "id2label": {
14
- "0": "O",
15
- "1": "B-nationality",
16
- "2": "B-event",
17
- "3": "B-person",
18
- "4": "B-artwork",
19
- "5": "B-location",
20
- "6": "B-product",
21
- "7": "B-organization",
22
- "8": "B-job",
23
- "9": "B-time",
24
- "10": "I-nationality",
25
- "11": "I-event",
26
- "12": "I-person",
27
- "13": "I-artwork",
28
- "14": "I-location",
29
- "15": "I-product",
30
- "16": "I-organization",
31
- "17": "I-job",
32
- "18": "I-time"
33
  },
34
  "initializer_range": 0.02,
35
  "intermediate_size": 4096,
36
  "label2id": {
37
- "O": 0,
38
- "B-nationality": 1,
39
- "B-event": 2,
40
- "B-person": 3,
41
- "B-artwork": 4,
42
- "B-location": 5,
43
- "B-product": 6,
44
- "B-organization": 7,
45
- "B-job": 8,
46
- "B-time": 9,
47
- "I-nationality": 10,
48
- "I-event": 11,
49
- "I-person": 12,
50
- "I-artwork": 13,
51
- "I-location": 14,
52
- "I-product": 15,
53
- "I-organization": 16,
54
- "I-job": 17,
55
- "I-time": 18
56
  },
57
  "layer_norm_eps": 1e-05,
58
  "max_position_embeddings": 514,
@@ -62,7 +62,7 @@
62
  "output_past": true,
63
  "pad_token_id": 1,
64
  "position_embedding_type": "absolute",
65
- "transformers_version": "4.6.0",
66
  "type_vocab_size": 1,
67
  "use_cache": true,
68
  "vocab_size": 250002
 
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
  "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5",
20
+ "6": "LABEL_6",
21
+ "7": "LABEL_7",
22
+ "8": "LABEL_8",
23
+ "9": "LABEL_9",
24
+ "10": "LABEL_10",
25
+ "11": "LABEL_11",
26
+ "12": "LABEL_12",
27
+ "13": "LABEL_13",
28
+ "14": "LABEL_14",
29
+ "15": "LABEL_15",
30
+ "16": "LABEL_16",
31
+ "17": "LABEL_17",
32
+ "18": "LABEL_18"
33
  },
34
  "initializer_range": 0.02,
35
  "intermediate_size": 4096,
36
  "label2id": {
37
+ "LABEL_0": 0,
38
+ "LABEL_1": 1,
39
+ "LABEL_10": 10,
40
+ "LABEL_11": 11,
41
+ "LABEL_12": 12,
42
+ "LABEL_13": 13,
43
+ "LABEL_14": 14,
44
+ "LABEL_15": 15,
45
+ "LABEL_16": 16,
46
+ "LABEL_17": 17,
47
+ "LABEL_18": 18,
48
+ "LABEL_2": 2,
49
+ "LABEL_3": 3,
50
+ "LABEL_4": 4,
51
+ "LABEL_5": 5,
52
+ "LABEL_6": 6,
53
+ "LABEL_7": 7,
54
+ "LABEL_8": 8,
55
+ "LABEL_9": 9
56
  },
57
  "layer_norm_eps": 1e-05,
58
  "max_position_embeddings": 514,
 
62
  "output_past": true,
63
  "pad_token_id": 1,
64
  "position_embedding_type": "absolute",
65
+ "transformers_version": "4.6.1",
66
  "type_vocab_size": 1,
67
  "use_cache": true,
68
  "vocab_size": 250002
model_args.json CHANGED
@@ -1 +1 @@
1
- {"adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "adafactor_eps": [1e-30, 0.001], "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_beta1": null, "adafactor_scale_parameter": true, "adafactor_relative_step": true, "adafactor_warmup_init": true, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 2e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "xlm-roberta-large", "model_type": "xlmroberta", "multiprocessing_chunksize": 500, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 10, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "process_count": 1, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "train_batch_size": 16, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_multiprocessing": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 563, "weight_decay": 0.001, "model_class": "NERModel", "classification_report": false, "labels_list": ["O", "B-job", "I-job", "B-nationality", "B-person", "I-person", "B-location", "B-time", "I-time", "B-event", "I-event", "B-organization", "I-organization", "I-location", "I-nationality", "B-product", "I-product", "B-artwork", "I-artwork"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 2e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "xlm-roberta-large", "model_type": "xlmroberta", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 10, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 1117, "weight_decay": 0.001, "model_class": "NERModel", "classification_report": false, "labels_list": ["O", "B-job", "I-job", "B-nationality", "B-person", "I-person", "B-location", "B-time", "I-time", "B-event", "I-event", "B-organization", "I-organization", "I-location", "I-nationality", "B-product", "I-product", "B-artwork", "I-artwork"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76682a554be0386f7d8aa099bb31a285431a884690cf6d1be262e999635ee640
3
+ size 4471129517
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf4a4d17471d6908750beeb0998d2cb4a744dbe3ca4aaabcd352a28d231ae3b9
3
  size 2235613943
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a68116126f007df58b80d3d4241d94834e9f324cf42c64e18115b02b6704fb95
3
  size 2235613943
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c022ff4c770b8f48647cabb531eed6c0b6843207e90c408c88bfa0ee2fcb08dd
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d867df0a42b652e7db484987d7db7a415b4fbf7a871dcba8ed42223e4d3da6f5
3
  size 623
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "xlm-roberta-large"}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sp_model_kwargs": {}, "do_lower_case": false, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": "/root/.cache/huggingface/transformers/7766c86e10505ed9b39af34e456480399bf06e35b36b8f2b917460a2dbe94e59.a984cf52fc87644bd4a2165f1e07e0ac880272c1e82d648b4674907056912bd7", "name_or_path": "xlm-roberta-large"}
trainer_state.json DELETED
@@ -1,571 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 3.704618424302297,
5
- "global_step": 30000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.06,
12
- "learning_rate": 1.9794187865316542e-05,
13
- "loss": 0.7638,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.12,
18
- "learning_rate": 1.958837573063308e-05,
19
- "loss": 0.5165,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.19,
24
- "learning_rate": 1.938256359594962e-05,
25
- "loss": 0.457,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.25,
30
- "learning_rate": 1.917675146126616e-05,
31
- "loss": 0.4516,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.31,
36
- "learning_rate": 1.8970939326582696e-05,
37
- "loss": 0.4095,
38
- "step": 2500
39
- },
40
- {
41
- "epoch": 0.37,
42
- "learning_rate": 1.8765127191899236e-05,
43
- "loss": 0.4188,
44
- "step": 3000
45
- },
46
- {
47
- "epoch": 0.43,
48
- "learning_rate": 1.8559315057215776e-05,
49
- "loss": 0.4028,
50
- "step": 3500
51
- },
52
- {
53
- "epoch": 0.49,
54
- "learning_rate": 1.8353502922532313e-05,
55
- "loss": 0.3846,
56
- "step": 4000
57
- },
58
- {
59
- "epoch": 0.56,
60
- "learning_rate": 1.8147690787848853e-05,
61
- "loss": 0.3813,
62
- "step": 4500
63
- },
64
- {
65
- "epoch": 0.62,
66
- "learning_rate": 1.7941878653165393e-05,
67
- "loss": 0.3842,
68
- "step": 5000
69
- },
70
- {
71
- "epoch": 0.68,
72
- "learning_rate": 1.773606651848193e-05,
73
- "loss": 0.3977,
74
- "step": 5500
75
- },
76
- {
77
- "epoch": 0.74,
78
- "learning_rate": 1.753025438379847e-05,
79
- "loss": 0.3749,
80
- "step": 6000
81
- },
82
- {
83
- "epoch": 0.8,
84
- "learning_rate": 1.732444224911501e-05,
85
- "loss": 0.3578,
86
- "step": 6500
87
- },
88
- {
89
- "epoch": 0.86,
90
- "learning_rate": 1.711863011443155e-05,
91
- "loss": 0.339,
92
- "step": 7000
93
- },
94
- {
95
- "epoch": 0.93,
96
- "learning_rate": 1.6912817979748087e-05,
97
- "loss": 0.3704,
98
- "step": 7500
99
- },
100
- {
101
- "epoch": 0.99,
102
- "learning_rate": 1.6707005845064627e-05,
103
- "loss": 0.3423,
104
- "step": 8000
105
- },
106
- {
107
- "epoch": 1.05,
108
- "learning_rate": 1.6501193710381164e-05,
109
- "loss": 0.2788,
110
- "step": 8500
111
- },
112
- {
113
- "epoch": 1.11,
114
- "learning_rate": 1.6295381575697704e-05,
115
- "loss": 0.2832,
116
- "step": 9000
117
- },
118
- {
119
- "epoch": 1.17,
120
- "learning_rate": 1.6089569441014244e-05,
121
- "loss": 0.3032,
122
- "step": 9500
123
- },
124
- {
125
- "epoch": 1.23,
126
- "learning_rate": 1.588375730633078e-05,
127
- "loss": 0.2774,
128
- "step": 10000
129
- },
130
- {
131
- "epoch": 1.23,
132
- "eval_artwork": {
133
- "f1": 0.3447251114413076,
134
- "number": 295,
135
- "precision": 0.30687830687830686,
136
- "recall": 0.39322033898305087
137
- },
138
- "eval_event": {
139
- "f1": 0.43632567849686854,
140
- "number": 519,
141
- "precision": 0.4760820045558087,
142
- "recall": 0.4026974951830443
143
- },
144
- "eval_job": {
145
- "f1": 0.6786657067434606,
146
- "number": 2257,
147
- "precision": 0.7403141361256544,
148
- "recall": 0.6264953478068233
149
- },
150
- "eval_location": {
151
- "f1": 0.6351052692799815,
152
- "number": 4375,
153
- "precision": 0.6466129796305069,
154
- "recall": 0.624
155
- },
156
- "eval_loss": 0.9446586966514587,
157
- "eval_nationality": {
158
- "f1": 0.6808716707021791,
159
- "number": 2137,
160
- "precision": 0.7054691419969895,
161
- "recall": 0.6579316799251287
162
- },
163
- "eval_organization": {
164
- "f1": 0.5520833333333333,
165
- "number": 1982,
166
- "precision": 0.6044417767106842,
167
- "recall": 0.5080726538849647
168
- },
169
- "eval_overall_accuracy": 0.8332146471271692,
170
- "eval_overall_f1": 0.6393731397679646,
171
- "eval_overall_precision": 0.6567667061833157,
172
- "eval_overall_recall": 0.6228770933191313,
173
- "eval_person": {
174
- "f1": 0.7589572006134114,
175
- "number": 3322,
176
- "precision": 0.7068293949623474,
177
- "recall": 0.8193859121011439
178
- },
179
- "eval_product": {
180
- "f1": 0.3201754385964912,
181
- "number": 323,
182
- "precision": 0.5488721804511278,
183
- "recall": 0.2260061919504644
184
- },
185
- "eval_runtime": 43.8587,
186
- "eval_samples_per_second": 35.911,
187
- "eval_time": {
188
- "f1": 0.5435339308578744,
189
- "number": 1689,
190
- "precision": 0.5916376306620209,
191
- "recall": 0.5026642984014209
192
- },
193
- "step": 10000
194
- },
195
- {
196
- "epoch": 1.3,
197
- "learning_rate": 1.567794517164732e-05,
198
- "loss": 0.2914,
199
- "step": 10500
200
- },
201
- {
202
- "epoch": 1.36,
203
- "learning_rate": 1.547213303696386e-05,
204
- "loss": 0.2878,
205
- "step": 11000
206
- },
207
- {
208
- "epoch": 1.42,
209
- "learning_rate": 1.52663209022804e-05,
210
- "loss": 0.2649,
211
- "step": 11500
212
- },
213
- {
214
- "epoch": 1.48,
215
- "learning_rate": 1.506050876759694e-05,
216
- "loss": 0.2794,
217
- "step": 12000
218
- },
219
- {
220
- "epoch": 1.54,
221
- "learning_rate": 1.4854696632913477e-05,
222
- "loss": 0.288,
223
- "step": 12500
224
- },
225
- {
226
- "epoch": 1.61,
227
- "learning_rate": 1.4648884498230017e-05,
228
- "loss": 0.2663,
229
- "step": 13000
230
- },
231
- {
232
- "epoch": 1.67,
233
- "learning_rate": 1.4443072363546555e-05,
234
- "loss": 0.2853,
235
- "step": 13500
236
- },
237
- {
238
- "epoch": 1.73,
239
- "learning_rate": 1.4237260228863094e-05,
240
- "loss": 0.2718,
241
- "step": 14000
242
- },
243
- {
244
- "epoch": 1.79,
245
- "learning_rate": 1.4031448094179634e-05,
246
- "loss": 0.2944,
247
- "step": 14500
248
- },
249
- {
250
- "epoch": 1.85,
251
- "learning_rate": 1.3825635959496172e-05,
252
- "loss": 0.2804,
253
- "step": 15000
254
- },
255
- {
256
- "epoch": 1.91,
257
- "learning_rate": 1.3619823824812712e-05,
258
- "loss": 0.2631,
259
- "step": 15500
260
- },
261
- {
262
- "epoch": 1.98,
263
- "learning_rate": 1.3414011690129251e-05,
264
- "loss": 0.2937,
265
- "step": 16000
266
- },
267
- {
268
- "epoch": 2.04,
269
- "learning_rate": 1.3208199555445791e-05,
270
- "loss": 0.2301,
271
- "step": 16500
272
- },
273
- {
274
- "epoch": 2.1,
275
- "learning_rate": 1.300238742076233e-05,
276
- "loss": 0.2136,
277
- "step": 17000
278
- },
279
- {
280
- "epoch": 2.16,
281
- "learning_rate": 1.2796575286078868e-05,
282
- "loss": 0.2054,
283
- "step": 17500
284
- },
285
- {
286
- "epoch": 2.22,
287
- "learning_rate": 1.2590763151395406e-05,
288
- "loss": 0.2215,
289
- "step": 18000
290
- },
291
- {
292
- "epoch": 2.28,
293
- "learning_rate": 1.2384951016711945e-05,
294
- "loss": 0.2237,
295
- "step": 18500
296
- },
297
- {
298
- "epoch": 2.35,
299
- "learning_rate": 1.2179138882028485e-05,
300
- "loss": 0.2124,
301
- "step": 19000
302
- },
303
- {
304
- "epoch": 2.41,
305
- "learning_rate": 1.1973326747345024e-05,
306
- "loss": 0.2042,
307
- "step": 19500
308
- },
309
- {
310
- "epoch": 2.47,
311
- "learning_rate": 1.1767514612661564e-05,
312
- "loss": 0.2253,
313
- "step": 20000
314
- },
315
- {
316
- "epoch": 2.47,
317
- "eval_artwork": {
318
- "f1": 0.30398069963811825,
319
- "number": 295,
320
- "precision": 0.23595505617977527,
321
- "recall": 0.4271186440677966
322
- },
323
- "eval_event": {
324
- "f1": 0.38726790450928383,
325
- "number": 519,
326
- "precision": 0.35784313725490197,
327
- "recall": 0.42196531791907516
328
- },
329
- "eval_job": {
330
- "f1": 0.6671604938271605,
331
- "number": 2257,
332
- "precision": 0.7534857780256553,
333
- "recall": 0.5985821887461231
334
- },
335
- "eval_location": {
336
- "f1": 0.6328576483075834,
337
- "number": 4375,
338
- "precision": 0.6537524366471735,
339
- "recall": 0.6132571428571428
340
- },
341
- "eval_loss": 1.0017756223678589,
342
- "eval_nationality": {
343
- "f1": 0.6760161635369623,
344
- "number": 2137,
345
- "precision": 0.6869565217391305,
346
- "recall": 0.6654188114178755
347
- },
348
- "eval_organization": {
349
- "f1": 0.5447824850725049,
350
- "number": 1982,
351
- "precision": 0.6241042345276873,
352
- "recall": 0.48335015136226034
353
- },
354
- "eval_overall_accuracy": 0.8219457041840011,
355
- "eval_overall_f1": 0.6276163319784019,
356
- "eval_overall_precision": 0.643866309827597,
357
- "eval_overall_recall": 0.6121664003787206,
358
- "eval_person": {
359
- "f1": 0.7596614950634696,
360
- "number": 3322,
361
- "precision": 0.7147027600849257,
362
- "recall": 0.8106562311860325
363
- },
364
- "eval_product": {
365
- "f1": 0.3699421965317919,
366
- "number": 323,
367
- "precision": 0.4897959183673469,
368
- "recall": 0.29721362229102166
369
- },
370
- "eval_runtime": 44.5273,
371
- "eval_samples_per_second": 35.372,
372
- "eval_time": {
373
- "f1": 0.5069974554707379,
374
- "number": 1689,
375
- "precision": 0.547766323024055,
376
- "recall": 0.4718768502072232
377
- },
378
- "step": 20000
379
- },
380
- {
381
- "epoch": 2.53,
382
- "learning_rate": 1.1561702477978102e-05,
383
- "loss": 0.2197,
384
- "step": 20500
385
- },
386
- {
387
- "epoch": 2.59,
388
- "learning_rate": 1.1355890343294642e-05,
389
- "loss": 0.2239,
390
- "step": 21000
391
- },
392
- {
393
- "epoch": 2.65,
394
- "learning_rate": 1.115007820861118e-05,
395
- "loss": 0.2231,
396
- "step": 21500
397
- },
398
- {
399
- "epoch": 2.72,
400
- "learning_rate": 1.0944266073927721e-05,
401
- "loss": 0.2045,
402
- "step": 22000
403
- },
404
- {
405
- "epoch": 2.78,
406
- "learning_rate": 1.0738453939244258e-05,
407
- "loss": 0.1935,
408
- "step": 22500
409
- },
410
- {
411
- "epoch": 2.84,
412
- "learning_rate": 1.0532641804560796e-05,
413
- "loss": 0.2156,
414
- "step": 23000
415
- },
416
- {
417
- "epoch": 2.9,
418
- "learning_rate": 1.0326829669877336e-05,
419
- "loss": 0.2088,
420
- "step": 23500
421
- },
422
- {
423
- "epoch": 2.96,
424
- "learning_rate": 1.0121017535193875e-05,
425
- "loss": 0.2022,
426
- "step": 24000
427
- },
428
- {
429
- "epoch": 3.03,
430
- "learning_rate": 9.915205400510415e-06,
431
- "loss": 0.1918,
432
- "step": 24500
433
- },
434
- {
435
- "epoch": 3.09,
436
- "learning_rate": 9.709393265826953e-06,
437
- "loss": 0.1494,
438
- "step": 25000
439
- },
440
- {
441
- "epoch": 3.15,
442
- "learning_rate": 9.503581131143494e-06,
443
- "loss": 0.1502,
444
- "step": 25500
445
- },
446
- {
447
- "epoch": 3.21,
448
- "learning_rate": 9.297768996460032e-06,
449
- "loss": 0.1741,
450
- "step": 26000
451
- },
452
- {
453
- "epoch": 3.27,
454
- "learning_rate": 9.09195686177657e-06,
455
- "loss": 0.1609,
456
- "step": 26500
457
- },
458
- {
459
- "epoch": 3.33,
460
- "learning_rate": 8.88614472709311e-06,
461
- "loss": 0.1584,
462
- "step": 27000
463
- },
464
- {
465
- "epoch": 3.4,
466
- "learning_rate": 8.680332592409649e-06,
467
- "loss": 0.1524,
468
- "step": 27500
469
- },
470
- {
471
- "epoch": 3.46,
472
- "learning_rate": 8.47452045772619e-06,
473
- "loss": 0.1513,
474
- "step": 28000
475
- },
476
- {
477
- "epoch": 3.52,
478
- "learning_rate": 8.268708323042726e-06,
479
- "loss": 0.1595,
480
- "step": 28500
481
- },
482
- {
483
- "epoch": 3.58,
484
- "learning_rate": 8.062896188359266e-06,
485
- "loss": 0.1631,
486
- "step": 29000
487
- },
488
- {
489
- "epoch": 3.64,
490
- "learning_rate": 7.857084053675805e-06,
491
- "loss": 0.1795,
492
- "step": 29500
493
- },
494
- {
495
- "epoch": 3.7,
496
- "learning_rate": 7.651271918992345e-06,
497
- "loss": 0.1602,
498
- "step": 30000
499
- },
500
- {
501
- "epoch": 3.7,
502
- "eval_artwork": {
503
- "f1": 0.32594936708860756,
504
- "number": 295,
505
- "precision": 0.3056379821958457,
506
- "recall": 0.34915254237288135
507
- },
508
- "eval_event": {
509
- "f1": 0.42843232716650437,
510
- "number": 519,
511
- "precision": 0.4330708661417323,
512
- "recall": 0.4238921001926782
513
- },
514
- "eval_job": {
515
- "f1": 0.6932195353247985,
516
- "number": 2257,
517
- "precision": 0.7455379908210097,
518
- "recall": 0.6477625166149756
519
- },
520
- "eval_location": {
521
- "f1": 0.6270144927536232,
522
- "number": 4375,
523
- "precision": 0.636235294117647,
524
- "recall": 0.6180571428571429
525
- },
526
- "eval_loss": 1.0499603748321533,
527
- "eval_nationality": {
528
- "f1": 0.6754837129561596,
529
- "number": 2137,
530
- "precision": 0.7086330935251799,
531
- "recall": 0.6452971455311184
532
- },
533
- "eval_organization": {
534
- "f1": 0.5687919463087249,
535
- "number": 1982,
536
- "precision": 0.6380175658720201,
537
- "recall": 0.5131180625630676
538
- },
539
- "eval_overall_accuracy": 0.8283547593168672,
540
- "eval_overall_f1": 0.6409662071489569,
541
- "eval_overall_precision": 0.6613380326011706,
542
- "eval_overall_recall": 0.6218119415350021,
543
- "eval_person": {
544
- "f1": 0.7734217353704237,
545
- "number": 3322,
546
- "precision": 0.7419800884955752,
547
- "recall": 0.8076459963877183
548
- },
549
- "eval_product": {
550
- "f1": 0.4022556390977444,
551
- "number": 323,
552
- "precision": 0.5119617224880383,
553
- "recall": 0.33126934984520123
554
- },
555
- "eval_runtime": 43.8719,
556
- "eval_samples_per_second": 35.9,
557
- "eval_time": {
558
- "f1": 0.5277161862527716,
559
- "number": 1689,
560
- "precision": 0.5674386920980926,
561
- "recall": 0.4931912374185909
562
- },
563
- "step": 30000
564
- }
565
- ],
566
- "max_steps": 48588,
567
- "num_train_epochs": 6,
568
- "total_flos": 3961931727576942.0,
569
- "trial_name": null,
570
- "trial_params": null
571
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e527ceb877458f834cb18192456e4ff3a0e2f3d9c8ae07805f23f677c165715f
3
- size 2479
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38da682fc8f5b1a45d948e5165d753debd1ee21627415b23d05208ce7115c6d8
3
+ size 3183