czuk commited on
Commit
1e089c2
1 Parent(s): 0c062df

Upload model 0.8.1

Browse files
LICENSE.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 CodeNLP Michał Marcińczuk
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,3 +1,23 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## About
2
+
3
+ A transformer-based model for named entity recognition for Polish.
4
+ The model was trained on the train part of the KPWr corpus to recognize
5
+ 82 categories of NE.
6
+
7
+ ## Evaluation summary
8
+
9
+ precision recall f1-score support
10
+ micro avg 0.7803 0.8033 0.7916 17711
11
+ macro avg 0.7921 0.8033 0.7932 17711
12
+
13
+ Detailed results can be found in the test_results.txt file.
14
+
15
+
16
+ ## Author
17
+
18
+ Michał Marcińczuk <marcinczuk@gmail.com>
19
+
20
+
21
+ ## License
22
+
23
+ [MIT](https://choosealicense.com/licenses/mit/
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allegro/herbert-large-cased",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 1,
20
+ "pooler_fc_size": 768,
21
+ "pooler_num_attention_heads": 12,
22
+ "pooler_num_fc_layers": 3,
23
+ "pooler_size_per_head": 128,
24
+ "pooler_type": "first_token_transform",
25
+ "position_embedding_type": "absolute",
26
+ "tokenizer_class": "HerbertTokenizerFast",
27
+ "transformers_version": "4.17.0",
28
+ "type_vocab_size": 2,
29
+ "use_cache": true,
30
+ "vocab_size": 50000
31
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pdn2_config.yml ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !!python/object:poldeepner2.model.hf_for_token_calssification.Pdn2ModelConfiguration
2
+ device: cuda:0
3
+ dropout_p: 0.2
4
+ head_init_range: 0.04
5
+ hidden_size: 1024
6
+ label_ignore_idx: 0
7
+ labels:
8
+ - B-nam_adj
9
+ - B-nam_adj_city
10
+ - B-nam_adj_country
11
+ - B-nam_adj_person
12
+ - B-nam_eve
13
+ - B-nam_eve_human
14
+ - B-nam_eve_human_cultural
15
+ - B-nam_eve_human_holiday
16
+ - B-nam_eve_human_sport
17
+ - B-nam_fac_bridge
18
+ - B-nam_fac_goe
19
+ - B-nam_fac_goe_stop
20
+ - B-nam_fac_park
21
+ - B-nam_fac_road
22
+ - B-nam_fac_square
23
+ - B-nam_fac_system
24
+ - B-nam_liv_animal
25
+ - B-nam_liv_character
26
+ - B-nam_liv_god
27
+ - B-nam_liv_habitant
28
+ - B-nam_liv_person
29
+ - B-nam_loc
30
+ - B-nam_loc_astronomical
31
+ - B-nam_loc_country_region
32
+ - B-nam_loc_gpe_admin1
33
+ - B-nam_loc_gpe_admin2
34
+ - B-nam_loc_gpe_admin3
35
+ - B-nam_loc_gpe_city
36
+ - B-nam_loc_gpe_conurbation
37
+ - B-nam_loc_gpe_country
38
+ - B-nam_loc_gpe_district
39
+ - B-nam_loc_gpe_subdivision
40
+ - B-nam_loc_historical_region
41
+ - B-nam_loc_hydronym
42
+ - B-nam_loc_hydronym_lake
43
+ - B-nam_loc_hydronym_ocean
44
+ - B-nam_loc_hydronym_river
45
+ - B-nam_loc_hydronym_sea
46
+ - B-nam_loc_land
47
+ - B-nam_loc_land_continent
48
+ - B-nam_loc_land_island
49
+ - B-nam_loc_land_mountain
50
+ - B-nam_loc_land_peak
51
+ - B-nam_loc_land_region
52
+ - B-nam_num_house
53
+ - B-nam_num_phone
54
+ - B-nam_org_company
55
+ - B-nam_org_group
56
+ - B-nam_org_group_band
57
+ - B-nam_org_group_team
58
+ - B-nam_org_institution
59
+ - B-nam_org_nation
60
+ - B-nam_org_organization
61
+ - B-nam_org_organization_sub
62
+ - B-nam_org_political_party
63
+ - B-nam_oth
64
+ - B-nam_oth_currency
65
+ - B-nam_oth_data_format
66
+ - B-nam_oth_license
67
+ - B-nam_oth_position
68
+ - B-nam_oth_tech
69
+ - B-nam_oth_www
70
+ - B-nam_pro
71
+ - B-nam_pro_award
72
+ - B-nam_pro_brand
73
+ - B-nam_pro_media
74
+ - B-nam_pro_media_periodic
75
+ - B-nam_pro_media_radio
76
+ - B-nam_pro_media_tv
77
+ - B-nam_pro_media_web
78
+ - B-nam_pro_model_car
79
+ - B-nam_pro_software
80
+ - B-nam_pro_software_game
81
+ - B-nam_pro_title
82
+ - B-nam_pro_title_album
83
+ - B-nam_pro_title_article
84
+ - B-nam_pro_title_book
85
+ - B-nam_pro_title_document
86
+ - B-nam_pro_title_song
87
+ - B-nam_pro_title_treaty
88
+ - B-nam_pro_title_tv
89
+ - B-nam_pro_vehicle
90
+ - I-nam_adj_country
91
+ - I-nam_eve
92
+ - I-nam_eve_human
93
+ - I-nam_eve_human_cultural
94
+ - I-nam_eve_human_holiday
95
+ - I-nam_eve_human_sport
96
+ - I-nam_fac_bridge
97
+ - I-nam_fac_goe
98
+ - I-nam_fac_goe_stop
99
+ - I-nam_fac_park
100
+ - I-nam_fac_road
101
+ - I-nam_fac_square
102
+ - I-nam_fac_system
103
+ - I-nam_liv_animal
104
+ - I-nam_liv_character
105
+ - I-nam_liv_god
106
+ - I-nam_liv_person
107
+ - I-nam_loc
108
+ - I-nam_loc_astronomical
109
+ - I-nam_loc_country_region
110
+ - I-nam_loc_gpe_admin1
111
+ - I-nam_loc_gpe_admin2
112
+ - I-nam_loc_gpe_admin3
113
+ - I-nam_loc_gpe_city
114
+ - I-nam_loc_gpe_conurbation
115
+ - I-nam_loc_gpe_country
116
+ - I-nam_loc_gpe_district
117
+ - I-nam_loc_gpe_subdivision
118
+ - I-nam_loc_historical_region
119
+ - I-nam_loc_hydronym
120
+ - I-nam_loc_hydronym_lake
121
+ - I-nam_loc_hydronym_ocean
122
+ - I-nam_loc_hydronym_river
123
+ - I-nam_loc_hydronym_sea
124
+ - I-nam_loc_land
125
+ - I-nam_loc_land_continent
126
+ - I-nam_loc_land_island
127
+ - I-nam_loc_land_mountain
128
+ - I-nam_loc_land_peak
129
+ - I-nam_loc_land_region
130
+ - I-nam_num_house
131
+ - I-nam_num_phone
132
+ - I-nam_org_company
133
+ - I-nam_org_group
134
+ - I-nam_org_group_band
135
+ - I-nam_org_group_team
136
+ - I-nam_org_institution
137
+ - I-nam_org_nation
138
+ - I-nam_org_organization
139
+ - I-nam_org_organization_sub
140
+ - I-nam_org_political_party
141
+ - I-nam_oth
142
+ - I-nam_oth_currency
143
+ - I-nam_oth_data_format
144
+ - I-nam_oth_license
145
+ - I-nam_oth_position
146
+ - I-nam_oth_tech
147
+ - I-nam_oth_www
148
+ - I-nam_pro
149
+ - I-nam_pro_award
150
+ - I-nam_pro_brand
151
+ - I-nam_pro_media
152
+ - I-nam_pro_media_periodic
153
+ - I-nam_pro_media_radio
154
+ - I-nam_pro_media_tv
155
+ - I-nam_pro_media_web
156
+ - I-nam_pro_model_car
157
+ - I-nam_pro_software
158
+ - I-nam_pro_software_game
159
+ - I-nam_pro_title
160
+ - I-nam_pro_title_album
161
+ - I-nam_pro_title_article
162
+ - I-nam_pro_title_book
163
+ - I-nam_pro_title_document
164
+ - I-nam_pro_title_song
165
+ - I-nam_pro_title_treaty
166
+ - I-nam_pro_title_tv
167
+ - I-nam_pro_vehicle
168
+ - O
169
+ max_seq_length: 256
170
+ seed: 101
171
+ sequence_generator: context-window
pdn2_eval_kpwr_test.txt ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ nam_adj 0.5741 0.5962 0.5849 52
4
+ nam_adj_city 0.8261 0.9048 0.8636 42
5
+ nam_adj_country 0.7538 0.8855 0.8144 166
6
+ nam_adj_person 1.0000 0.7778 0.8750 18
7
+ nam_eve 1.0000 0.8750 0.9333 8
8
+ nam_eve_human 0.4337 0.4615 0.4472 78
9
+ nam_eve_human_cultural 0.3529 0.2727 0.3077 22
10
+ nam_eve_human_holiday 0.5455 0.6667 0.6000 9
11
+ nam_eve_human_sport 0.6230 0.6909 0.6552 55
12
+ nam_fac_bridge 0.3750 0.7500 0.5000 4
13
+ nam_fac_goe 0.6071 0.5312 0.5667 64
14
+ nam_fac_goe_stop 0.6000 0.7500 0.6667 4
15
+ nam_fac_park 0.5556 0.5000 0.5263 10
16
+ nam_fac_road 0.9100 0.9579 0.9333 95
17
+ nam_fac_square 0.8000 0.6667 0.7273 6
18
+ nam_fac_system 0.8235 0.5385 0.6512 26
19
+ nam_liv_animal 0.6429 0.8182 0.7200 11
20
+ nam_liv_god 0.8205 0.9143 0.8649 35
21
+ nam_liv_habitant 0.3750 0.4286 0.4000 7
22
+ nam_liv_person 0.9503 0.9663 0.9582 949
23
+ nam_loc 0.0000 0.0000 0.0000 4
24
+ nam_loc_country_region 0.0667 0.2500 0.1053 4
25
+ nam_loc_gpe_admin1 0.7867 0.9219 0.8489 64
26
+ nam_loc_gpe_admin2 0.8000 0.6667 0.7273 36
27
+ nam_loc_gpe_admin3 0.8140 0.7447 0.7778 47
28
+ nam_loc_gpe_city 0.9032 0.8970 0.9001 437
29
+ nam_loc_gpe_country 0.9536 0.9776 0.9654 357
30
+ nam_loc_gpe_district 0.2439 0.5556 0.3390 18
31
+ nam_loc_gpe_subdivision 0.7143 0.1923 0.3030 26
32
+ nam_loc_historical_region 0.8235 0.6364 0.7179 22
33
+ nam_loc_hydronym 0.0000 0.0000 0.0000 1
34
+ nam_loc_hydronym_lake 1.0000 0.5000 0.6667 2
35
+ nam_loc_hydronym_ocean 1.0000 1.0000 1.0000 1
36
+ nam_loc_hydronym_river 0.8627 0.8627 0.8627 51
37
+ nam_loc_hydronym_sea 1.0000 1.0000 1.0000 3
38
+ nam_loc_land 0.0000 0.0000 0.0000 2
39
+ nam_loc_land_continent 0.9118 0.9688 0.9394 32
40
+ nam_loc_land_island 1.0000 0.5455 0.7059 11
41
+ nam_loc_land_mountain 0.8571 0.6667 0.7500 9
42
+ nam_loc_land_region 0.5455 0.5455 0.5455 11
43
+ nam_num_house 0.8462 1.0000 0.9167 11
44
+ nam_num_phone 1.0000 0.5000 0.6667 2
45
+ nam_org_company 0.6512 0.7368 0.6914 76
46
+ nam_org_group 0.3571 0.2778 0.3125 18
47
+ nam_org_group_band 0.7895 0.7895 0.7895 19
48
+ nam_org_group_team 0.9412 0.9664 0.9536 149
49
+ nam_org_institution 0.7434 0.7406 0.7420 266
50
+ nam_org_nation 0.9000 0.7778 0.8344 81
51
+ nam_org_organization 0.7754 0.7439 0.7593 246
52
+ nam_org_organization_sub 0.5000 0.3333 0.4000 3
53
+ nam_org_political_party 0.8871 0.9483 0.9167 58
54
+ nam_oth 0.0732 0.1364 0.0952 22
55
+ nam_oth_currency 0.9362 0.8627 0.8980 51
56
+ nam_oth_data_format 0.0000 0.0000 0.0000 10
57
+ nam_oth_license 0.6429 0.8182 0.7200 11
58
+ nam_oth_position 0.3333 0.7000 0.4516 10
59
+ nam_oth_tech 0.5000 0.5738 0.5344 61
60
+ nam_oth_www 0.7500 0.1500 0.2500 20
61
+ nam_pro 0.0000 0.0000 0.0000 2
62
+ nam_pro_award 0.7600 0.8261 0.7917 23
63
+ nam_pro_brand 0.5435 0.5435 0.5435 46
64
+ nam_pro_media 0.0000 0.0000 0.0000 8
65
+ nam_pro_media_periodic 0.8280 0.9167 0.8701 84
66
+ nam_pro_media_radio 0.5000 0.6667 0.5714 3
67
+ nam_pro_media_tv 0.4667 1.0000 0.6364 7
68
+ nam_pro_media_web 0.3836 0.7000 0.4956 40
69
+ nam_pro_model_car 0.8571 0.9231 0.8889 26
70
+ nam_pro_software 0.7381 0.6392 0.6851 97
71
+ nam_pro_software_game 0.0000 0.0000 0.0000 3
72
+ nam_pro_title 0.4545 0.5714 0.5063 35
73
+ nam_pro_title_album 0.6667 0.8571 0.7500 7
74
+ nam_pro_title_book 0.2500 0.1818 0.2105 11
75
+ nam_pro_title_document 0.5826 0.7614 0.6601 88
76
+ nam_pro_title_song 0.6000 0.4286 0.5000 7
77
+ nam_pro_title_treaty 0.0000 0.0000 0.0000 2
78
+ nam_pro_title_tv 0.8824 0.6250 0.7317 24
79
+ nam_pro_vehicle 0.0000 0.0000 0.0000 4
80
+
81
+ micro avg 0.7905 0.8158 0.8029 4430
82
+ macro avg 0.8044 0.8158 0.8052 4430
pdn2_metadata.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ author: Michał Marcińczuk, CodeNLP
2
+ contact: marcinczuk@gmail.com
3
+ description: Model trained on the KPWr corpus recognizes 82 types of nested entities.
4
+ reference: https://www.sciencedirect.com/science/article/pii/S1877050921015179
5
+ score: F1=80.29 on the KPWr test subset (micro avg, strict evaluation).
6
+
pdn2_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cb4fc4bd740196db90ab184b7aad0699dacc12628f5cdf29ab143bd39f3ff35
3
+ size 1425385193
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}
test_results.txt ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ precision recall f1-score support
2
+
3
+ nam_adj 0.6230 0.5721 0.5965 208
4
+ nam_adj_city 0.8343 0.8690 0.8513 168
5
+ nam_adj_country 0.7543 0.8554 0.8017 664
6
+ nam_adj_person 0.9825 0.7778 0.8682 72
7
+ nam_eve 1.0000 0.8438 0.9153 32
8
+ nam_eve_human 0.3813 0.4583 0.4163 312
9
+ nam_eve_human_cultural 0.3148 0.1932 0.2394 88
10
+ nam_eve_human_holiday 0.5349 0.6389 0.5823 36
11
+ nam_eve_human_sport 0.6173 0.6818 0.6479 220
12
+ nam_fac_bridge 0.5217 0.7500 0.6154 16
13
+ nam_fac_goe 0.5983 0.5469 0.5714 256
14
+ nam_fac_goe_stop 0.5000 0.6250 0.5556 16
15
+ nam_fac_park 0.6111 0.5500 0.5789 40
16
+ nam_fac_road 0.9333 0.9579 0.9455 380
17
+ nam_fac_square 0.8421 0.6667 0.7442 24
18
+ nam_fac_system 0.7647 0.5000 0.6047 104
19
+ nam_liv_animal 0.5106 0.5455 0.5275 44
20
+ nam_liv_god 0.7785 0.8786 0.8255 140
21
+ nam_liv_habitant 0.4074 0.3929 0.4000 28
22
+ nam_liv_person 0.9476 0.9629 0.9552 3796
23
+ nam_loc 0.0185 0.0625 0.0286 16
24
+ nam_loc_country_region 0.0926 0.3125 0.1429 16
25
+ nam_loc_gpe_admin1 0.7607 0.9062 0.8271 256
26
+ nam_loc_gpe_admin2 0.8091 0.6181 0.7008 144
27
+ nam_loc_gpe_admin3 0.8225 0.7394 0.7787 188
28
+ nam_loc_gpe_city 0.8915 0.9022 0.8968 1748
29
+ nam_loc_gpe_country 0.9438 0.9643 0.9539 1428
30
+ nam_loc_gpe_district 0.2198 0.5556 0.3150 72
31
+ nam_loc_gpe_subdivision 0.6452 0.1923 0.2963 104
32
+ nam_loc_historical_region 0.8333 0.6250 0.7143 88
33
+ nam_loc_hydronym 0.0000 0.0000 0.0000 4
34
+ nam_loc_hydronym_lake 1.0000 0.5000 0.6667 8
35
+ nam_loc_hydronym_ocean 1.0000 1.0000 1.0000 4
36
+ nam_loc_hydronym_river 0.8495 0.8578 0.8537 204
37
+ nam_loc_hydronym_sea 0.8571 1.0000 0.9231 12
38
+ nam_loc_land 0.0000 0.0000 0.0000 8
39
+ nam_loc_land_continent 0.9248 0.9609 0.9425 128
40
+ nam_loc_land_island 0.6667 0.4545 0.5405 44
41
+ nam_loc_land_mountain 0.7692 0.5556 0.6452 36
42
+ nam_loc_land_region 0.5476 0.5227 0.5349 44
43
+ nam_num_house 0.8627 1.0000 0.9263 44
44
+ nam_num_phone 1.0000 0.6250 0.7692 8
45
+ nam_org_company 0.6393 0.7171 0.6760 304
46
+ nam_org_group 0.3409 0.2083 0.2586 72
47
+ nam_org_group_band 0.7922 0.8026 0.7974 76
48
+ nam_org_group_team 0.9329 0.9564 0.9445 596
49
+ nam_org_institution 0.7062 0.7500 0.7274 1064
50
+ nam_org_nation 0.8737 0.7685 0.8177 324
51
+ nam_org_organization 0.7511 0.7175 0.7339 984
52
+ nam_org_organization_sub 0.1538 0.1667 0.1600 12
53
+ nam_org_political_party 0.8810 0.9569 0.9174 232
54
+ nam_oth 0.0805 0.1364 0.1013 88
55
+ nam_oth_currency 0.9396 0.8382 0.8860 204
56
+ nam_oth_data_format 0.0000 0.0000 0.0000 40
57
+ nam_oth_license 0.7551 0.8409 0.7957 44
58
+ nam_oth_position 0.3766 0.7250 0.4957 40
59
+ nam_oth_tech 0.5094 0.5574 0.5323 244
60
+ nam_oth_www 0.5556 0.1250 0.2041 80
61
+ nam_pro 0.0000 0.0000 0.0000 8
62
+ nam_pro_award 0.6782 0.6413 0.6592 92
63
+ nam_pro_brand 0.5202 0.5598 0.5393 184
64
+ nam_pro_media 0.0000 0.0000 0.0000 32
65
+ nam_pro_media_periodic 0.8149 0.8886 0.8501 332
66
+ nam_pro_media_radio 0.5556 0.8333 0.6667 12
67
+ nam_pro_media_tv 0.4179 1.0000 0.5895 28
68
+ nam_pro_media_web 0.3695 0.6813 0.4791 160
69
+ nam_pro_model_car 0.8246 0.9038 0.8624 104
70
+ nam_pro_software 0.7430 0.6186 0.6751 388
71
+ nam_pro_software_game 0.0000 0.0000 0.0000 12
72
+ nam_pro_title 0.5063 0.5714 0.5369 140
73
+ nam_pro_title_album 0.6190 0.9286 0.7429 28
74
+ nam_pro_title_book 0.3103 0.2045 0.2466 44
75
+ nam_pro_title_document 0.5137 0.6484 0.5732 347
76
+ nam_pro_title_song 0.7059 0.4286 0.5333 28
77
+ nam_pro_title_treaty 0.0714 0.1250 0.0909 8
78
+ nam_pro_title_tv 0.8636 0.5938 0.7037 96
79
+ nam_pro_vehicle 0.0000 0.0000 0.0000 16
80
+
81
+ micro avg 0.7803 0.8033 0.7916 17711
82
+ macro avg 0.7921 0.8033 0.7932 17711
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "sep_token": "</s>", "do_lowercase_and_remove_accent": false, "bos_token": "<s>", "additional_special_tokens": [], "model_max_length": 512, "special_tokens_map_file": "/home/czuk/.cache/huggingface/transformers/7e8fe8852a1ff7e03195cb41fac16af837f8c14a34a61850b02a7395eb294f00.b8e113717eb1828d09e47de853cf49c8fad05ebdce24df2614cd942dc23e2a77", "name_or_path": "allegro/herbert-large-cased", "lang2id": null, "id2lang": null, "tokenizer_class": "HerbertTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff