Upload model 0.8.1
Browse files- LICENSE.md +21 -0
- README.md +23 -3
- config.json +31 -0
- merges.txt +0 -0
- pdn2_config.yml +171 -0
- pdn2_eval_kpwr_test.txt +82 -0
- pdn2_metadata.yml +6 -0
- pdn2_model.pt +3 -0
- special_tokens_map.json +1 -0
- test_results.txt +82 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- vocab.json +0 -0
LICENSE.md
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 CodeNLP Michał Marcińczuk
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## About
|
2 |
+
|
3 |
+
A transformer-based model for named entity recognition for Polish.
|
4 |
+
The model was trained on the train part of the KPWr corpus to recognize
|
5 |
+
82 categories of NE.
|
6 |
+
|
7 |
+
## Evaluation summary
|
8 |
+
|
9 |
+
precision recall f1-score support
|
10 |
+
micro avg 0.7803 0.8033 0.7916 17711
|
11 |
+
macro avg 0.7921 0.8033 0.7932 17711
|
12 |
+
|
13 |
+
Detailed results can be found in the test_results.txt file.
|
14 |
+
|
15 |
+
|
16 |
+
## Author
|
17 |
+
|
18 |
+
Michał Marcińczuk <marcinczuk@gmail.com>
|
19 |
+
|
20 |
+
|
21 |
+
## License
|
22 |
+
|
23 |
+
[MIT](https://choosealicense.com/licenses/mit/
|
config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "allegro/herbert-large-cased",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"directionality": "bidi",
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 4096,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_hidden_layers": 24,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"pooler_fc_size": 768,
|
21 |
+
"pooler_num_attention_heads": 12,
|
22 |
+
"pooler_num_fc_layers": 3,
|
23 |
+
"pooler_size_per_head": 128,
|
24 |
+
"pooler_type": "first_token_transform",
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"tokenizer_class": "HerbertTokenizerFast",
|
27 |
+
"transformers_version": "4.17.0",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 50000
|
31 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pdn2_config.yml
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
!!python/object:poldeepner2.model.hf_for_token_calssification.Pdn2ModelConfiguration
|
2 |
+
device: cuda:0
|
3 |
+
dropout_p: 0.2
|
4 |
+
head_init_range: 0.04
|
5 |
+
hidden_size: 1024
|
6 |
+
label_ignore_idx: 0
|
7 |
+
labels:
|
8 |
+
- B-nam_adj
|
9 |
+
- B-nam_adj_city
|
10 |
+
- B-nam_adj_country
|
11 |
+
- B-nam_adj_person
|
12 |
+
- B-nam_eve
|
13 |
+
- B-nam_eve_human
|
14 |
+
- B-nam_eve_human_cultural
|
15 |
+
- B-nam_eve_human_holiday
|
16 |
+
- B-nam_eve_human_sport
|
17 |
+
- B-nam_fac_bridge
|
18 |
+
- B-nam_fac_goe
|
19 |
+
- B-nam_fac_goe_stop
|
20 |
+
- B-nam_fac_park
|
21 |
+
- B-nam_fac_road
|
22 |
+
- B-nam_fac_square
|
23 |
+
- B-nam_fac_system
|
24 |
+
- B-nam_liv_animal
|
25 |
+
- B-nam_liv_character
|
26 |
+
- B-nam_liv_god
|
27 |
+
- B-nam_liv_habitant
|
28 |
+
- B-nam_liv_person
|
29 |
+
- B-nam_loc
|
30 |
+
- B-nam_loc_astronomical
|
31 |
+
- B-nam_loc_country_region
|
32 |
+
- B-nam_loc_gpe_admin1
|
33 |
+
- B-nam_loc_gpe_admin2
|
34 |
+
- B-nam_loc_gpe_admin3
|
35 |
+
- B-nam_loc_gpe_city
|
36 |
+
- B-nam_loc_gpe_conurbation
|
37 |
+
- B-nam_loc_gpe_country
|
38 |
+
- B-nam_loc_gpe_district
|
39 |
+
- B-nam_loc_gpe_subdivision
|
40 |
+
- B-nam_loc_historical_region
|
41 |
+
- B-nam_loc_hydronym
|
42 |
+
- B-nam_loc_hydronym_lake
|
43 |
+
- B-nam_loc_hydronym_ocean
|
44 |
+
- B-nam_loc_hydronym_river
|
45 |
+
- B-nam_loc_hydronym_sea
|
46 |
+
- B-nam_loc_land
|
47 |
+
- B-nam_loc_land_continent
|
48 |
+
- B-nam_loc_land_island
|
49 |
+
- B-nam_loc_land_mountain
|
50 |
+
- B-nam_loc_land_peak
|
51 |
+
- B-nam_loc_land_region
|
52 |
+
- B-nam_num_house
|
53 |
+
- B-nam_num_phone
|
54 |
+
- B-nam_org_company
|
55 |
+
- B-nam_org_group
|
56 |
+
- B-nam_org_group_band
|
57 |
+
- B-nam_org_group_team
|
58 |
+
- B-nam_org_institution
|
59 |
+
- B-nam_org_nation
|
60 |
+
- B-nam_org_organization
|
61 |
+
- B-nam_org_organization_sub
|
62 |
+
- B-nam_org_political_party
|
63 |
+
- B-nam_oth
|
64 |
+
- B-nam_oth_currency
|
65 |
+
- B-nam_oth_data_format
|
66 |
+
- B-nam_oth_license
|
67 |
+
- B-nam_oth_position
|
68 |
+
- B-nam_oth_tech
|
69 |
+
- B-nam_oth_www
|
70 |
+
- B-nam_pro
|
71 |
+
- B-nam_pro_award
|
72 |
+
- B-nam_pro_brand
|
73 |
+
- B-nam_pro_media
|
74 |
+
- B-nam_pro_media_periodic
|
75 |
+
- B-nam_pro_media_radio
|
76 |
+
- B-nam_pro_media_tv
|
77 |
+
- B-nam_pro_media_web
|
78 |
+
- B-nam_pro_model_car
|
79 |
+
- B-nam_pro_software
|
80 |
+
- B-nam_pro_software_game
|
81 |
+
- B-nam_pro_title
|
82 |
+
- B-nam_pro_title_album
|
83 |
+
- B-nam_pro_title_article
|
84 |
+
- B-nam_pro_title_book
|
85 |
+
- B-nam_pro_title_document
|
86 |
+
- B-nam_pro_title_song
|
87 |
+
- B-nam_pro_title_treaty
|
88 |
+
- B-nam_pro_title_tv
|
89 |
+
- B-nam_pro_vehicle
|
90 |
+
- I-nam_adj_country
|
91 |
+
- I-nam_eve
|
92 |
+
- I-nam_eve_human
|
93 |
+
- I-nam_eve_human_cultural
|
94 |
+
- I-nam_eve_human_holiday
|
95 |
+
- I-nam_eve_human_sport
|
96 |
+
- I-nam_fac_bridge
|
97 |
+
- I-nam_fac_goe
|
98 |
+
- I-nam_fac_goe_stop
|
99 |
+
- I-nam_fac_park
|
100 |
+
- I-nam_fac_road
|
101 |
+
- I-nam_fac_square
|
102 |
+
- I-nam_fac_system
|
103 |
+
- I-nam_liv_animal
|
104 |
+
- I-nam_liv_character
|
105 |
+
- I-nam_liv_god
|
106 |
+
- I-nam_liv_person
|
107 |
+
- I-nam_loc
|
108 |
+
- I-nam_loc_astronomical
|
109 |
+
- I-nam_loc_country_region
|
110 |
+
- I-nam_loc_gpe_admin1
|
111 |
+
- I-nam_loc_gpe_admin2
|
112 |
+
- I-nam_loc_gpe_admin3
|
113 |
+
- I-nam_loc_gpe_city
|
114 |
+
- I-nam_loc_gpe_conurbation
|
115 |
+
- I-nam_loc_gpe_country
|
116 |
+
- I-nam_loc_gpe_district
|
117 |
+
- I-nam_loc_gpe_subdivision
|
118 |
+
- I-nam_loc_historical_region
|
119 |
+
- I-nam_loc_hydronym
|
120 |
+
- I-nam_loc_hydronym_lake
|
121 |
+
- I-nam_loc_hydronym_ocean
|
122 |
+
- I-nam_loc_hydronym_river
|
123 |
+
- I-nam_loc_hydronym_sea
|
124 |
+
- I-nam_loc_land
|
125 |
+
- I-nam_loc_land_continent
|
126 |
+
- I-nam_loc_land_island
|
127 |
+
- I-nam_loc_land_mountain
|
128 |
+
- I-nam_loc_land_peak
|
129 |
+
- I-nam_loc_land_region
|
130 |
+
- I-nam_num_house
|
131 |
+
- I-nam_num_phone
|
132 |
+
- I-nam_org_company
|
133 |
+
- I-nam_org_group
|
134 |
+
- I-nam_org_group_band
|
135 |
+
- I-nam_org_group_team
|
136 |
+
- I-nam_org_institution
|
137 |
+
- I-nam_org_nation
|
138 |
+
- I-nam_org_organization
|
139 |
+
- I-nam_org_organization_sub
|
140 |
+
- I-nam_org_political_party
|
141 |
+
- I-nam_oth
|
142 |
+
- I-nam_oth_currency
|
143 |
+
- I-nam_oth_data_format
|
144 |
+
- I-nam_oth_license
|
145 |
+
- I-nam_oth_position
|
146 |
+
- I-nam_oth_tech
|
147 |
+
- I-nam_oth_www
|
148 |
+
- I-nam_pro
|
149 |
+
- I-nam_pro_award
|
150 |
+
- I-nam_pro_brand
|
151 |
+
- I-nam_pro_media
|
152 |
+
- I-nam_pro_media_periodic
|
153 |
+
- I-nam_pro_media_radio
|
154 |
+
- I-nam_pro_media_tv
|
155 |
+
- I-nam_pro_media_web
|
156 |
+
- I-nam_pro_model_car
|
157 |
+
- I-nam_pro_software
|
158 |
+
- I-nam_pro_software_game
|
159 |
+
- I-nam_pro_title
|
160 |
+
- I-nam_pro_title_album
|
161 |
+
- I-nam_pro_title_article
|
162 |
+
- I-nam_pro_title_book
|
163 |
+
- I-nam_pro_title_document
|
164 |
+
- I-nam_pro_title_song
|
165 |
+
- I-nam_pro_title_treaty
|
166 |
+
- I-nam_pro_title_tv
|
167 |
+
- I-nam_pro_vehicle
|
168 |
+
- O
|
169 |
+
max_seq_length: 256
|
170 |
+
seed: 101
|
171 |
+
sequence_generator: context-window
|
pdn2_eval_kpwr_test.txt
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
precision recall f1-score support
|
2 |
+
|
3 |
+
nam_adj 0.5741 0.5962 0.5849 52
|
4 |
+
nam_adj_city 0.8261 0.9048 0.8636 42
|
5 |
+
nam_adj_country 0.7538 0.8855 0.8144 166
|
6 |
+
nam_adj_person 1.0000 0.7778 0.8750 18
|
7 |
+
nam_eve 1.0000 0.8750 0.9333 8
|
8 |
+
nam_eve_human 0.4337 0.4615 0.4472 78
|
9 |
+
nam_eve_human_cultural 0.3529 0.2727 0.3077 22
|
10 |
+
nam_eve_human_holiday 0.5455 0.6667 0.6000 9
|
11 |
+
nam_eve_human_sport 0.6230 0.6909 0.6552 55
|
12 |
+
nam_fac_bridge 0.3750 0.7500 0.5000 4
|
13 |
+
nam_fac_goe 0.6071 0.5312 0.5667 64
|
14 |
+
nam_fac_goe_stop 0.6000 0.7500 0.6667 4
|
15 |
+
nam_fac_park 0.5556 0.5000 0.5263 10
|
16 |
+
nam_fac_road 0.9100 0.9579 0.9333 95
|
17 |
+
nam_fac_square 0.8000 0.6667 0.7273 6
|
18 |
+
nam_fac_system 0.8235 0.5385 0.6512 26
|
19 |
+
nam_liv_animal 0.6429 0.8182 0.7200 11
|
20 |
+
nam_liv_god 0.8205 0.9143 0.8649 35
|
21 |
+
nam_liv_habitant 0.3750 0.4286 0.4000 7
|
22 |
+
nam_liv_person 0.9503 0.9663 0.9582 949
|
23 |
+
nam_loc 0.0000 0.0000 0.0000 4
|
24 |
+
nam_loc_country_region 0.0667 0.2500 0.1053 4
|
25 |
+
nam_loc_gpe_admin1 0.7867 0.9219 0.8489 64
|
26 |
+
nam_loc_gpe_admin2 0.8000 0.6667 0.7273 36
|
27 |
+
nam_loc_gpe_admin3 0.8140 0.7447 0.7778 47
|
28 |
+
nam_loc_gpe_city 0.9032 0.8970 0.9001 437
|
29 |
+
nam_loc_gpe_country 0.9536 0.9776 0.9654 357
|
30 |
+
nam_loc_gpe_district 0.2439 0.5556 0.3390 18
|
31 |
+
nam_loc_gpe_subdivision 0.7143 0.1923 0.3030 26
|
32 |
+
nam_loc_historical_region 0.8235 0.6364 0.7179 22
|
33 |
+
nam_loc_hydronym 0.0000 0.0000 0.0000 1
|
34 |
+
nam_loc_hydronym_lake 1.0000 0.5000 0.6667 2
|
35 |
+
nam_loc_hydronym_ocean 1.0000 1.0000 1.0000 1
|
36 |
+
nam_loc_hydronym_river 0.8627 0.8627 0.8627 51
|
37 |
+
nam_loc_hydronym_sea 1.0000 1.0000 1.0000 3
|
38 |
+
nam_loc_land 0.0000 0.0000 0.0000 2
|
39 |
+
nam_loc_land_continent 0.9118 0.9688 0.9394 32
|
40 |
+
nam_loc_land_island 1.0000 0.5455 0.7059 11
|
41 |
+
nam_loc_land_mountain 0.8571 0.6667 0.7500 9
|
42 |
+
nam_loc_land_region 0.5455 0.5455 0.5455 11
|
43 |
+
nam_num_house 0.8462 1.0000 0.9167 11
|
44 |
+
nam_num_phone 1.0000 0.5000 0.6667 2
|
45 |
+
nam_org_company 0.6512 0.7368 0.6914 76
|
46 |
+
nam_org_group 0.3571 0.2778 0.3125 18
|
47 |
+
nam_org_group_band 0.7895 0.7895 0.7895 19
|
48 |
+
nam_org_group_team 0.9412 0.9664 0.9536 149
|
49 |
+
nam_org_institution 0.7434 0.7406 0.7420 266
|
50 |
+
nam_org_nation 0.9000 0.7778 0.8344 81
|
51 |
+
nam_org_organization 0.7754 0.7439 0.7593 246
|
52 |
+
nam_org_organization_sub 0.5000 0.3333 0.4000 3
|
53 |
+
nam_org_political_party 0.8871 0.9483 0.9167 58
|
54 |
+
nam_oth 0.0732 0.1364 0.0952 22
|
55 |
+
nam_oth_currency 0.9362 0.8627 0.8980 51
|
56 |
+
nam_oth_data_format 0.0000 0.0000 0.0000 10
|
57 |
+
nam_oth_license 0.6429 0.8182 0.7200 11
|
58 |
+
nam_oth_position 0.3333 0.7000 0.4516 10
|
59 |
+
nam_oth_tech 0.5000 0.5738 0.5344 61
|
60 |
+
nam_oth_www 0.7500 0.1500 0.2500 20
|
61 |
+
nam_pro 0.0000 0.0000 0.0000 2
|
62 |
+
nam_pro_award 0.7600 0.8261 0.7917 23
|
63 |
+
nam_pro_brand 0.5435 0.5435 0.5435 46
|
64 |
+
nam_pro_media 0.0000 0.0000 0.0000 8
|
65 |
+
nam_pro_media_periodic 0.8280 0.9167 0.8701 84
|
66 |
+
nam_pro_media_radio 0.5000 0.6667 0.5714 3
|
67 |
+
nam_pro_media_tv 0.4667 1.0000 0.6364 7
|
68 |
+
nam_pro_media_web 0.3836 0.7000 0.4956 40
|
69 |
+
nam_pro_model_car 0.8571 0.9231 0.8889 26
|
70 |
+
nam_pro_software 0.7381 0.6392 0.6851 97
|
71 |
+
nam_pro_software_game 0.0000 0.0000 0.0000 3
|
72 |
+
nam_pro_title 0.4545 0.5714 0.5063 35
|
73 |
+
nam_pro_title_album 0.6667 0.8571 0.7500 7
|
74 |
+
nam_pro_title_book 0.2500 0.1818 0.2105 11
|
75 |
+
nam_pro_title_document 0.5826 0.7614 0.6601 88
|
76 |
+
nam_pro_title_song 0.6000 0.4286 0.5000 7
|
77 |
+
nam_pro_title_treaty 0.0000 0.0000 0.0000 2
|
78 |
+
nam_pro_title_tv 0.8824 0.6250 0.7317 24
|
79 |
+
nam_pro_vehicle 0.0000 0.0000 0.0000 4
|
80 |
+
|
81 |
+
micro avg 0.7905 0.8158 0.8029 4430
|
82 |
+
macro avg 0.8044 0.8158 0.8052 4430
|
pdn2_metadata.yml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
author: Michał Marcińczuk, CodeNLP
|
2 |
+
contact: marcinczuk@gmail.com
|
3 |
+
description: Model trained on the KPWr corpus recognizes 82 types of nested entities.
|
4 |
+
reference: https://www.sciencedirect.com/science/article/pii/S1877050921015179
|
5 |
+
score: F1=80.29 on the KPWr test subset (micro avg, strict evaluation).
|
6 |
+
|
pdn2_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cb4fc4bd740196db90ab184b7aad0699dacc12628f5cdf29ab143bd39f3ff35
|
3 |
+
size 1425385193
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}
|
test_results.txt
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
precision recall f1-score support
|
2 |
+
|
3 |
+
nam_adj 0.6230 0.5721 0.5965 208
|
4 |
+
nam_adj_city 0.8343 0.8690 0.8513 168
|
5 |
+
nam_adj_country 0.7543 0.8554 0.8017 664
|
6 |
+
nam_adj_person 0.9825 0.7778 0.8682 72
|
7 |
+
nam_eve 1.0000 0.8438 0.9153 32
|
8 |
+
nam_eve_human 0.3813 0.4583 0.4163 312
|
9 |
+
nam_eve_human_cultural 0.3148 0.1932 0.2394 88
|
10 |
+
nam_eve_human_holiday 0.5349 0.6389 0.5823 36
|
11 |
+
nam_eve_human_sport 0.6173 0.6818 0.6479 220
|
12 |
+
nam_fac_bridge 0.5217 0.7500 0.6154 16
|
13 |
+
nam_fac_goe 0.5983 0.5469 0.5714 256
|
14 |
+
nam_fac_goe_stop 0.5000 0.6250 0.5556 16
|
15 |
+
nam_fac_park 0.6111 0.5500 0.5789 40
|
16 |
+
nam_fac_road 0.9333 0.9579 0.9455 380
|
17 |
+
nam_fac_square 0.8421 0.6667 0.7442 24
|
18 |
+
nam_fac_system 0.7647 0.5000 0.6047 104
|
19 |
+
nam_liv_animal 0.5106 0.5455 0.5275 44
|
20 |
+
nam_liv_god 0.7785 0.8786 0.8255 140
|
21 |
+
nam_liv_habitant 0.4074 0.3929 0.4000 28
|
22 |
+
nam_liv_person 0.9476 0.9629 0.9552 3796
|
23 |
+
nam_loc 0.0185 0.0625 0.0286 16
|
24 |
+
nam_loc_country_region 0.0926 0.3125 0.1429 16
|
25 |
+
nam_loc_gpe_admin1 0.7607 0.9062 0.8271 256
|
26 |
+
nam_loc_gpe_admin2 0.8091 0.6181 0.7008 144
|
27 |
+
nam_loc_gpe_admin3 0.8225 0.7394 0.7787 188
|
28 |
+
nam_loc_gpe_city 0.8915 0.9022 0.8968 1748
|
29 |
+
nam_loc_gpe_country 0.9438 0.9643 0.9539 1428
|
30 |
+
nam_loc_gpe_district 0.2198 0.5556 0.3150 72
|
31 |
+
nam_loc_gpe_subdivision 0.6452 0.1923 0.2963 104
|
32 |
+
nam_loc_historical_region 0.8333 0.6250 0.7143 88
|
33 |
+
nam_loc_hydronym 0.0000 0.0000 0.0000 4
|
34 |
+
nam_loc_hydronym_lake 1.0000 0.5000 0.6667 8
|
35 |
+
nam_loc_hydronym_ocean 1.0000 1.0000 1.0000 4
|
36 |
+
nam_loc_hydronym_river 0.8495 0.8578 0.8537 204
|
37 |
+
nam_loc_hydronym_sea 0.8571 1.0000 0.9231 12
|
38 |
+
nam_loc_land 0.0000 0.0000 0.0000 8
|
39 |
+
nam_loc_land_continent 0.9248 0.9609 0.9425 128
|
40 |
+
nam_loc_land_island 0.6667 0.4545 0.5405 44
|
41 |
+
nam_loc_land_mountain 0.7692 0.5556 0.6452 36
|
42 |
+
nam_loc_land_region 0.5476 0.5227 0.5349 44
|
43 |
+
nam_num_house 0.8627 1.0000 0.9263 44
|
44 |
+
nam_num_phone 1.0000 0.6250 0.7692 8
|
45 |
+
nam_org_company 0.6393 0.7171 0.6760 304
|
46 |
+
nam_org_group 0.3409 0.2083 0.2586 72
|
47 |
+
nam_org_group_band 0.7922 0.8026 0.7974 76
|
48 |
+
nam_org_group_team 0.9329 0.9564 0.9445 596
|
49 |
+
nam_org_institution 0.7062 0.7500 0.7274 1064
|
50 |
+
nam_org_nation 0.8737 0.7685 0.8177 324
|
51 |
+
nam_org_organization 0.7511 0.7175 0.7339 984
|
52 |
+
nam_org_organization_sub 0.1538 0.1667 0.1600 12
|
53 |
+
nam_org_political_party 0.8810 0.9569 0.9174 232
|
54 |
+
nam_oth 0.0805 0.1364 0.1013 88
|
55 |
+
nam_oth_currency 0.9396 0.8382 0.8860 204
|
56 |
+
nam_oth_data_format 0.0000 0.0000 0.0000 40
|
57 |
+
nam_oth_license 0.7551 0.8409 0.7957 44
|
58 |
+
nam_oth_position 0.3766 0.7250 0.4957 40
|
59 |
+
nam_oth_tech 0.5094 0.5574 0.5323 244
|
60 |
+
nam_oth_www 0.5556 0.1250 0.2041 80
|
61 |
+
nam_pro 0.0000 0.0000 0.0000 8
|
62 |
+
nam_pro_award 0.6782 0.6413 0.6592 92
|
63 |
+
nam_pro_brand 0.5202 0.5598 0.5393 184
|
64 |
+
nam_pro_media 0.0000 0.0000 0.0000 32
|
65 |
+
nam_pro_media_periodic 0.8149 0.8886 0.8501 332
|
66 |
+
nam_pro_media_radio 0.5556 0.8333 0.6667 12
|
67 |
+
nam_pro_media_tv 0.4179 1.0000 0.5895 28
|
68 |
+
nam_pro_media_web 0.3695 0.6813 0.4791 160
|
69 |
+
nam_pro_model_car 0.8246 0.9038 0.8624 104
|
70 |
+
nam_pro_software 0.7430 0.6186 0.6751 388
|
71 |
+
nam_pro_software_game 0.0000 0.0000 0.0000 12
|
72 |
+
nam_pro_title 0.5063 0.5714 0.5369 140
|
73 |
+
nam_pro_title_album 0.6190 0.9286 0.7429 28
|
74 |
+
nam_pro_title_book 0.3103 0.2045 0.2466 44
|
75 |
+
nam_pro_title_document 0.5137 0.6484 0.5732 347
|
76 |
+
nam_pro_title_song 0.7059 0.4286 0.5333 28
|
77 |
+
nam_pro_title_treaty 0.0714 0.1250 0.0909 8
|
78 |
+
nam_pro_title_tv 0.8636 0.5938 0.7037 96
|
79 |
+
nam_pro_vehicle 0.0000 0.0000 0.0000 16
|
80 |
+
|
81 |
+
micro avg 0.7803 0.8033 0.7916 17711
|
82 |
+
macro avg 0.7921 0.8033 0.7932 17711
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "sep_token": "</s>", "do_lowercase_and_remove_accent": false, "bos_token": "<s>", "additional_special_tokens": [], "model_max_length": 512, "special_tokens_map_file": "/home/czuk/.cache/huggingface/transformers/7e8fe8852a1ff7e03195cb41fac16af837f8c14a34a61850b02a7395eb294f00.b8e113717eb1828d09e47de853cf49c8fad05ebdce24df2614cd942dc23e2a77", "name_or_path": "allegro/herbert-large-cased", "lang2id": null, "id2lang": null, "tokenizer_class": "HerbertTokenizer"}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|