bakrianoo commited on
Commit
aaebb3e
1 Parent(s): 1964770

update NER model

Browse files
config.json CHANGED
@@ -10,7 +10,8 @@
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
- "id2label": {"0": "O",
 
14
  "1": "B-job",
15
  "10": "I-event",
16
  "11": "B-organization",
@@ -28,28 +29,31 @@
28
  "6": "B-location",
29
  "7": "B-time",
30
  "8": "I-time",
31
- "9": "B-event"},
 
32
  "initializer_range": 0.02,
33
  "intermediate_size": 4096,
34
- "label2id": {"B-artwork": 17,
35
- "B-event": 9,
36
- "B-job": 1,
37
- "B-location": 6,
38
- "B-nationality": 3,
39
- "B-organization": 11,
40
- "B-person": 4,
41
- "B-product": 15,
42
- "B-time": 7,
43
- "I-artwork": 18,
44
- "I-event": 10,
45
- "I-job": 2,
46
- "I-location": 13,
47
- "I-nationality": 14,
48
- "I-organization": 12,
49
- "I-person": 5,
50
- "I-product": 16,
51
- "I-time": 8,
52
- "O": 0},
 
 
53
  "layer_norm_eps": 1e-05,
54
  "max_position_embeddings": 514,
55
  "model_type": "xlm-roberta",
@@ -58,7 +62,7 @@
58
  "output_past": true,
59
  "pad_token_id": 1,
60
  "position_embedding_type": "absolute",
61
- "transformers_version": "4.6.1",
62
  "type_vocab_size": 1,
63
  "use_cache": true,
64
  "vocab_size": 250002
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
+ "id2label": {
14
+ "0": "O",
15
  "1": "B-job",
16
  "10": "I-event",
17
  "11": "B-organization",
29
  "6": "B-location",
30
  "7": "B-time",
31
  "8": "I-time",
32
+ "9": "B-event"
33
+ },
34
  "initializer_range": 0.02,
35
  "intermediate_size": 4096,
36
+ "label2id": {
37
+ "B-artwork": 17,
38
+ "B-event": 9,
39
+ "B-job": 1,
40
+ "B-location": 6,
41
+ "B-nationality": 3,
42
+ "B-organization": 11,
43
+ "B-person": 4,
44
+ "B-product": 15,
45
+ "B-time": 7,
46
+ "I-artwork": 18,
47
+ "I-event": 10,
48
+ "I-job": 2,
49
+ "I-location": 13,
50
+ "I-nationality": 14,
51
+ "I-organization": 12,
52
+ "I-person": 5,
53
+ "I-product": 16,
54
+ "I-time": 8,
55
+ "O": 0
56
+ },
57
  "layer_norm_eps": 1e-05,
58
  "max_position_embeddings": 514,
59
  "model_type": "xlm-roberta",
62
  "output_past": true,
63
  "pad_token_id": 1,
64
  "position_embedding_type": "absolute",
65
+ "transformers_version": "4.8.0",
66
  "type_vocab_size": 1,
67
  "use_cache": true,
68
  "vocab_size": 250002
model_args.json DELETED
@@ -1 +0,0 @@
1
- {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 2e-05, "local_rank": -1, "logging_steps": 50, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "xlm-roberta-large", "model_type": "xlmroberta", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 10, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 1117, "weight_decay": 0.001, "model_class": "NERModel", "classification_report": false, "labels_list": ["O", "B-job", "I-job", "B-nationality", "B-person", "I-person", "B-location", "B-time", "I-time", "B-event", "I-event", "B-organization", "I-organization", "I-location", "I-nationality", "B-product", "I-product", "B-artwork", "I-artwork"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
 
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76682a554be0386f7d8aa099bb31a285431a884690cf6d1be262e999635ee640
3
- size 4471129517
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e015e14ae96831cdbfb558d6ebbea70a4e11dbe45658522f6573cd82d893886
3
+ size 4471110437
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a68116126f007df58b80d3d4241d94834e9f324cf42c64e18115b02b6704fb95
3
- size 2235613943
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0573363cbd9a9ac418edc7f8416d8e0251fe5937a013a47455a4662891f6885
3
+ size 2235604657
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d26bb06c30dde94f1eb2f74699c1b5cb62f3fb8d6051a2940fae5f1149470ce
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d867df0a42b652e7db484987d7db7a415b4fbf7a871dcba8ed42223e4d3da6f5
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:710a6bdce18d3c7ce72067f04b21ab94ae75a2ee4d731088bcedf84c21aed99f
3
  size 623
sentencepiece.bpe.model DELETED
Binary file (5.07 MB)
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "xlm-roberta-large"}
1
+ {"bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "xlm-roberta-large", "tokenizer_class": "XLMRobertaTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3130991458892822,
3
+ "best_model_checkpoint": "/gdrive/MyDrive/Marefa/Tebyan/models/huggingface/model-xlm-roberta-large/checkpoint-13000",
4
+ "epoch": 9.0,
5
+ "global_step": 58500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 1.8e-05,
13
+ "loss": 0.4403,
14
+ "step": 6500
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.900120719929763,
19
+ "eval_f1": 0.7773765268189059,
20
+ "eval_loss": 0.3497047424316406,
21
+ "eval_precision": 0.7735151130839146,
22
+ "eval_recall": 0.7812766865926558,
23
+ "eval_runtime": 26.5811,
24
+ "eval_samples_per_second": 70.99,
25
+ "eval_steps_per_second": 35.514,
26
+ "step": 6500
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "learning_rate": 1.6000000000000003e-05,
31
+ "loss": 0.307,
32
+ "step": 13000
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.904587357330992,
37
+ "eval_f1": 0.7846807172066689,
38
+ "eval_loss": 0.3130991458892822,
39
+ "eval_precision": 0.7710179270554296,
40
+ "eval_recall": 0.798836464560205,
41
+ "eval_runtime": 26.8623,
42
+ "eval_samples_per_second": 70.247,
43
+ "eval_steps_per_second": 35.142,
44
+ "step": 13000
45
+ },
46
+ {
47
+ "epoch": 3.0,
48
+ "learning_rate": 1.4e-05,
49
+ "loss": 0.2386,
50
+ "step": 19500
51
+ },
52
+ {
53
+ "epoch": 3.0,
54
+ "eval_accuracy": 0.9074187884108867,
55
+ "eval_f1": 0.7895801055644548,
56
+ "eval_loss": 0.348012775182724,
57
+ "eval_precision": 0.7771517187903851,
58
+ "eval_recall": 0.8024124679760888,
59
+ "eval_runtime": 26.5101,
60
+ "eval_samples_per_second": 71.18,
61
+ "eval_steps_per_second": 35.609,
62
+ "step": 19500
63
+ },
64
+ {
65
+ "epoch": 4.0,
66
+ "learning_rate": 1.2e-05,
67
+ "loss": 0.1883,
68
+ "step": 26000
69
+ },
70
+ {
71
+ "epoch": 4.0,
72
+ "eval_accuracy": 0.9088235294117647,
73
+ "eval_f1": 0.7946725265301746,
74
+ "eval_loss": 0.3668019771575928,
75
+ "eval_precision": 0.7980086114101184,
76
+ "eval_recall": 0.791364218616567,
77
+ "eval_runtime": 26.9144,
78
+ "eval_samples_per_second": 70.111,
79
+ "eval_steps_per_second": 35.074,
80
+ "step": 26000
81
+ },
82
+ {
83
+ "epoch": 5.0,
84
+ "learning_rate": 1e-05,
85
+ "loss": 0.1439,
86
+ "step": 32500
87
+ },
88
+ {
89
+ "epoch": 5.0,
90
+ "eval_accuracy": 0.9110074626865672,
91
+ "eval_f1": 0.801425039215165,
92
+ "eval_loss": 0.40606749057769775,
93
+ "eval_precision": 0.7984319542300153,
94
+ "eval_recall": 0.8044406490179334,
95
+ "eval_runtime": 26.8012,
96
+ "eval_samples_per_second": 70.407,
97
+ "eval_steps_per_second": 35.222,
98
+ "step": 32500
99
+ },
100
+ {
101
+ "epoch": 6.0,
102
+ "learning_rate": 8.000000000000001e-06,
103
+ "loss": 0.1108,
104
+ "step": 39000
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "eval_accuracy": 0.9115561896400352,
109
+ "eval_f1": 0.8033741926980361,
110
+ "eval_loss": 0.46604597568511963,
111
+ "eval_precision": 0.7936871712068337,
112
+ "eval_recall": 0.8133005977796754,
113
+ "eval_runtime": 26.6526,
114
+ "eval_samples_per_second": 70.8,
115
+ "eval_steps_per_second": 35.419,
116
+ "step": 39000
117
+ },
118
+ {
119
+ "epoch": 7.0,
120
+ "learning_rate": 6e-06,
121
+ "loss": 0.0825,
122
+ "step": 45500
123
+ },
124
+ {
125
+ "epoch": 7.0,
126
+ "eval_accuracy": 0.9102502194907814,
127
+ "eval_f1": 0.8003274619061452,
128
+ "eval_loss": 0.5183274149894714,
129
+ "eval_precision": 0.7920652344362553,
130
+ "eval_recall": 0.8087638770281811,
131
+ "eval_runtime": 26.7116,
132
+ "eval_samples_per_second": 70.643,
133
+ "eval_steps_per_second": 35.34,
134
+ "step": 45500
135
+ },
136
+ {
137
+ "epoch": 8.0,
138
+ "learning_rate": 4.000000000000001e-06,
139
+ "loss": 0.0601,
140
+ "step": 52000
141
+ },
142
+ {
143
+ "epoch": 8.0,
144
+ "eval_accuracy": 0.9106014047410009,
145
+ "eval_f1": 0.8013862969874699,
146
+ "eval_loss": 0.5683603882789612,
147
+ "eval_precision": 0.800575263662512,
148
+ "eval_recall": 0.802198975234842,
149
+ "eval_runtime": 26.5594,
150
+ "eval_samples_per_second": 71.048,
151
+ "eval_steps_per_second": 35.543,
152
+ "step": 52000
153
+ },
154
+ {
155
+ "epoch": 9.0,
156
+ "learning_rate": 2.0000000000000003e-06,
157
+ "loss": 0.0431,
158
+ "step": 58500
159
+ },
160
+ {
161
+ "epoch": 9.0,
162
+ "eval_accuracy": 0.91078797190518,
163
+ "eval_f1": 0.8022889236230693,
164
+ "eval_loss": 0.6140836477279663,
165
+ "eval_precision": 0.7964862448056388,
166
+ "eval_recall": 0.8081767719897524,
167
+ "eval_runtime": 26.6356,
168
+ "eval_samples_per_second": 70.845,
169
+ "eval_steps_per_second": 35.441,
170
+ "step": 58500
171
+ }
172
+ ],
173
+ "max_steps": 65000,
174
+ "num_train_epochs": 10,
175
+ "total_flos": 2.5323970171833972e+16,
176
+ "trial_name": null,
177
+ "trial_params": null
178
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38da682fc8f5b1a45d948e5165d753debd1ee21627415b23d05208ce7115c6d8
3
- size 3183
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40697fbddeb3ba1528744eacd0f68bb1e8189da159f4a5eee73059ce6209e591
3
+ size 2735