birgermoell commited on
Commit
fe57383
1 Parent(s): a3ee076

Added model

Browse files
README.md ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+
3
+ license: apache-2.0
4
+
5
+ tags:
6
+
7
+ - token-classification
8
+
9
+ datasets:
10
+
11
+ - wikiann
12
+
13
+ metrics:
14
+
15
+ - precision
16
+
17
+ - recall
18
+
19
+ - f1
20
+
21
+ - accuracy
22
+
23
+ model-index:
24
+
25
+ - name: distilroberta-base-ner-wikiann
26
+
27
+ results:
28
+
29
+ - task:
30
+
31
+ name: Token Classification
32
+
33
+ type: token-classification
34
+
35
+ dataset:
36
+
37
+ name: wikiann
38
+
39
+ type: wikiann
40
+
41
+ metrics:
42
+
43
+ - name: Precision
44
+
45
+ type: precision
46
+
47
+ value: 0.8331921416757433
48
+
49
+ - name: Recall
50
+
51
+ type: recall
52
+
53
+ value: 0.84243586083126
54
+
55
+ - name: F1
56
+
57
+ type: f1
58
+
59
+ value: 0.8377885044416501
60
+
61
+ - name: Accuracy
62
+
63
+ type: accuracy
64
+
65
+ value: 0.91930707459758
66
+
67
+ ---
68
+
69
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
70
+
71
+ should probably proofread and complete it, then remove this comment. -->
72
+
73
+ # distilroberta-base-ner-wikiann
74
+
75
+ This model is a fine-tuned version of [distilroberta-base](https://huggingface.co/distilroberta-base) on the wikiann dataset.
76
+
77
+ eval F1-Score: **83,78**
78
+
79
+ test F1-Score: **83,76**
80
+
81
+ ## Model Usage
82
+
83
+ ```python
84
+
85
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
86
+
87
+ from transformers import pipeline
88
+
89
+ tokenizer = AutoTokenizer.from_pretrained("philschmid/distilroberta-base-ner-wikiann")
90
+
91
+ model = AutoModelForTokenClassification.from_pretrained("philschmid/distilroberta-base-ner-wikiann")
92
+
93
+ nlp = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
94
+
95
+ example = "Jag heter Per och jag jobbar på KTH"
96
+
97
+ nlp(example)
98
+
99
+ ```
100
+ <!--
101
+ ## Training procedure
102
+
103
+ ### Training hyperparameters
104
+
105
+ The following hyperparameters were used during training:
106
+
107
+ - learning_rate: 4.9086903597787154e-05
108
+
109
+ - train_batch_size: 32
110
+
111
+ - eval_batch_size: 16
112
+
113
+ - seed: 42
114
+
115
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
116
+
117
+ - lr_scheduler_type: linear
118
+
119
+ - num_epochs: 5.0
120
+
121
+ - mixed_precision_training: Native AMP
122
+
123
+ ### Training results
124
+
125
+ It achieves the following results on the evaluation set:
126
+
127
+ - Loss: 0.3156
128
+
129
+ - Precision: 0.8332
130
+
131
+ - Recall: 0.8424
132
+
133
+ - F1: 0.8378
134
+
135
+ - Accuracy: 0.9193
136
+
137
+ It achieves the following results on the test set:
138
+
139
+ - Loss: 0.3023
140
+
141
+ - Precision: 0.8301
142
+
143
+ - Recall: 0.8452
144
+
145
+ - F1: 0.8376
146
+
147
+ - Accuracy: 0.92
148
+
149
+ ### Framework versions
150
+
151
+ - Transformers 4.6.1
152
+
153
+ - Pytorch 1.8.1+cu101
154
+
155
+ - Datasets 1.6.2
156
+
157
+ - Tokenizers 0.10.2
158
+ -->
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9848601595201899,
4
+ "eval_f1": 0.9434072987346416,
5
+ "eval_loss": 0.08223789930343628,
6
+ "eval_precision": 0.939307624890447,
7
+ "eval_recall": 0.9475429160834009,
8
+ "eval_runtime": 148.4881,
9
+ "eval_samples": 10000,
10
+ "eval_samples_per_second": 67.345,
11
+ "eval_steps_per_second": 8.418,
12
+ "train_loss": 0.06440118865966797,
13
+ "train_runtime": 4082.003,
14
+ "train_samples": 20000,
15
+ "train_samples_per_second": 14.699,
16
+ "train_steps_per_second": 1.837
17
+ }
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "flax-community/nordic-roberta-wiki",
3
+ "architectures": [
4
+ "RobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "finetuning_task": "ner",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": 0,
16
+ "1": 1,
17
+ "2": 2,
18
+ "3": 3,
19
+ "4": 4,
20
+ "5": 5,
21
+ "6": 6
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "0": 0,
27
+ "1": 1,
28
+ "2": 2,
29
+ "3": 3,
30
+ "4": 4,
31
+ "5": 5,
32
+ "6": 6
33
+ },
34
+ "layer_norm_eps": 1e-05,
35
+ "max_position_embeddings": 514,
36
+ "model_type": "roberta",
37
+ "num_attention_heads": 12,
38
+ "num_hidden_layers": 12,
39
+ "pad_token_id": 1,
40
+ "position_embedding_type": "absolute",
41
+ "torch_dtype": "float32",
42
+ "transformers_version": "4.9.2",
43
+ "type_vocab_size": 1,
44
+ "use_cache": true,
45
+ "vocab_size": 50265
46
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9abff09592158fcd4c18cce638554873ef69517584f1f4fb07e7ad355cff9e3e
3
+ size 496323633
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": true, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": "flax-community/nordic-roberta-wiki", "tokenizer_class": "RobertaTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.06440118865966797,
4
+ "train_runtime": 4082.003,
5
+ "train_samples": 20000,
6
+ "train_samples_per_second": 14.699,
7
+ "train_steps_per_second": 1.837
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 7500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.2,
12
+ "learning_rate": 4.666666666666667e-05,
13
+ "loss": 0.2034,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.4,
18
+ "learning_rate": 4.3333333333333334e-05,
19
+ "loss": 0.1179,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.6,
24
+ "learning_rate": 4e-05,
25
+ "loss": 0.1084,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.8,
30
+ "learning_rate": 3.6666666666666666e-05,
31
+ "loss": 0.1066,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 1.0,
36
+ "learning_rate": 3.3333333333333335e-05,
37
+ "loss": 0.0912,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 1.2,
42
+ "learning_rate": 3e-05,
43
+ "loss": 0.0487,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 1.4,
48
+ "learning_rate": 2.6666666666666667e-05,
49
+ "loss": 0.0465,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 1.6,
54
+ "learning_rate": 2.3333333333333336e-05,
55
+ "loss": 0.0521,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 1.8,
60
+ "learning_rate": 2e-05,
61
+ "loss": 0.0455,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 2.0,
66
+ "learning_rate": 1.6666666666666667e-05,
67
+ "loss": 0.0466,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 2.2,
72
+ "learning_rate": 1.3333333333333333e-05,
73
+ "loss": 0.0165,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 2.4,
78
+ "learning_rate": 1e-05,
79
+ "loss": 0.0222,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 2.6,
84
+ "learning_rate": 6.666666666666667e-06,
85
+ "loss": 0.0196,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 2.8,
90
+ "learning_rate": 3.3333333333333333e-06,
91
+ "loss": 0.0219,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 3.0,
96
+ "learning_rate": 0.0,
97
+ "loss": 0.0189,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 3.0,
102
+ "step": 7500,
103
+ "total_flos": 741030271017696.0,
104
+ "train_loss": 0.06440118865966797,
105
+ "train_runtime": 4082.003,
106
+ "train_samples_per_second": 14.699,
107
+ "train_steps_per_second": 1.837
108
+ }
109
+ ],
110
+ "max_steps": 7500,
111
+ "num_train_epochs": 3,
112
+ "total_flos": 741030271017696.0,
113
+ "trial_name": null,
114
+ "trial_params": null
115
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9712f30498e58843c4a1c5253bfc62ddc1e6bab728243ca91311aed3aac5fa5
3
+ size 2671
vocab.json ADDED
The diff for this file is too large to render. See raw diff