a100
commited on
Commit
•
a558337
1
Parent(s):
6598217
Update spacy pipeline to 3.2.2
Browse files- README.md +11 -11
- config.cfg +13 -4
- experimental_arc_labeler/model +1 -1
- experimental_arc_predicter/model +1 -1
- hu_core_news_trf-any-py3-none-any.whl +2 -2
- lookup_lemmatizer/lookups.bin +3 -0
- meta.json +25 -18
- ner/model +1 -1
- transformer/model +1 -1
README.md
CHANGED
@@ -14,13 +14,13 @@ model-index:
|
|
14 |
metrics:
|
15 |
- name: NER Precision
|
16 |
type: precision
|
17 |
-
value: 0.
|
18 |
- name: NER Recall
|
19 |
type: recall
|
20 |
-
value: 0.
|
21 |
- name: NER F Score
|
22 |
type: f_score
|
23 |
-
value: 0.
|
24 |
- task:
|
25 |
name: TAG
|
26 |
type: token-classification
|
@@ -48,7 +48,7 @@ model-index:
|
|
48 |
metrics:
|
49 |
- name: Lemma Accuracy
|
50 |
type: accuracy
|
51 |
-
value: 0.
|
52 |
- task:
|
53 |
name: UNLABELED_DEPENDENCIES
|
54 |
type: token-classification
|
@@ -76,10 +76,10 @@ Hungarian transformer pipeline (huBert) for HuSpaCy. Components: transformer, se
|
|
76 |
| Feature | Description |
|
77 |
| --- | --- |
|
78 |
| **Name** | `hu_core_news_trf` |
|
79 |
-
| **Version** | `3.2.
|
80 |
| **spaCy** | `>=3.2.4,<3.3.0` |
|
81 |
-
| **Default Pipeline** | `transformer`, `senter`, `tagger`, `morphologizer`, `experimental_edit_tree_lemmatizer`, `experimental_arc_predicter`, `experimental_arc_labeler`, `ner` |
|
82 |
-
| **Components** | `transformer`, `senter`, `tagger`, `morphologizer`, `experimental_edit_tree_lemmatizer`, `experimental_arc_predicter`, `experimental_arc_labeler`, `ner` |
|
83 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
84 |
| **Sources** | [UD Hungarian Szeged](https://universaldependencies.org/treebanks/hu_szeged/index.html) (Richárd Farkas, Katalin Simkó, Zsolt Szántó, Viktor Varga, Veronika Vincze (MTA-SZTE Research Group on Artificial Intelligence))<br />[NYTK-NerKor Corpus](https://github.com/nytud/NYTK-NerKor) (Eszter Simon, Noémi Vadász (Department of Language Technology and Applied Linguistics))<br />[hunNERwiki](http://hlt.sztaki.hu/resources/hunnerwiki.html) (Eszter Simon, Dávid Márk Nemeskey (HLT Group, Budapest University of Technology and Economics))<br />[Szeged NER Corpus](https://rgai.inf.u-szeged.hu/node/130) (György Szarvas, Richárd Farkas, László Felföldi, András Kocsor, János Csirik (MTA-SZTE Research Group on Artificial Intelligence))<br />[huBERT base model (cased)](https://huggingface.co/SZTAKI-HLT/hubert-base-cc) (Dávid Márk Nemeskey (SZTAKI-HLT)) |
|
85 |
| **License** | `cc-by-sa-4.0` |
|
@@ -118,11 +118,11 @@ Hungarian transformer pipeline (huBert) for HuSpaCy. Components: transformer, se
|
|
118 |
| `MORPH_MICRO_P` | 97.75 |
|
119 |
| `MORPH_MICRO_R` | 97.09 |
|
120 |
| `MORPH_MICRO_F` | 97.42 |
|
121 |
-
| `LEMMA_ACC` | 98.
|
122 |
| `BOUND_DEP_LAS` | 87.15 |
|
123 |
| `BOUND_DEP_UAS` | 91.19 |
|
124 |
| `DEP_UAS` | 91.17 |
|
125 |
| `DEP_LAS` | 87.13 |
|
126 |
-
| `ENTS_P` | 90.
|
127 |
-
| `ENTS_R` |
|
128 |
-
| `ENTS_F` |
|
|
|
14 |
metrics:
|
15 |
- name: NER Precision
|
16 |
type: precision
|
17 |
+
value: 0.9094751673
|
18 |
- name: NER Recall
|
19 |
type: recall
|
20 |
+
value: 0.9078762307
|
21 |
- name: NER F Score
|
22 |
type: f_score
|
23 |
+
value: 0.9086749956
|
24 |
- task:
|
25 |
name: TAG
|
26 |
type: token-classification
|
|
|
48 |
metrics:
|
49 |
- name: Lemma Accuracy
|
50 |
type: accuracy
|
51 |
+
value: 0.9864127835
|
52 |
- task:
|
53 |
name: UNLABELED_DEPENDENCIES
|
54 |
type: token-classification
|
|
|
76 |
| Feature | Description |
|
77 |
| --- | --- |
|
78 |
| **Name** | `hu_core_news_trf` |
|
79 |
+
| **Version** | `3.2.2` |
|
80 |
| **spaCy** | `>=3.2.4,<3.3.0` |
|
81 |
+
| **Default Pipeline** | `transformer`, `senter`, `tagger`, `morphologizer`, `lookup_lemmatizer`, `experimental_edit_tree_lemmatizer`, `lemma_smoother`, `experimental_arc_predicter`, `experimental_arc_labeler`, `ner` |
|
82 |
+
| **Components** | `transformer`, `senter`, `tagger`, `morphologizer`, `lookup_lemmatizer`, `experimental_edit_tree_lemmatizer`, `lemma_smoother`, `experimental_arc_predicter`, `experimental_arc_labeler`, `ner` |
|
83 |
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
84 |
| **Sources** | [UD Hungarian Szeged](https://universaldependencies.org/treebanks/hu_szeged/index.html) (Richárd Farkas, Katalin Simkó, Zsolt Szántó, Viktor Varga, Veronika Vincze (MTA-SZTE Research Group on Artificial Intelligence))<br />[NYTK-NerKor Corpus](https://github.com/nytud/NYTK-NerKor) (Eszter Simon, Noémi Vadász (Department of Language Technology and Applied Linguistics))<br />[hunNERwiki](http://hlt.sztaki.hu/resources/hunnerwiki.html) (Eszter Simon, Dávid Márk Nemeskey (HLT Group, Budapest University of Technology and Economics))<br />[Szeged NER Corpus](https://rgai.inf.u-szeged.hu/node/130) (György Szarvas, Richárd Farkas, László Felföldi, András Kocsor, János Csirik (MTA-SZTE Research Group on Artificial Intelligence))<br />[huBERT base model (cased)](https://huggingface.co/SZTAKI-HLT/hubert-base-cc) (Dávid Márk Nemeskey (SZTAKI-HLT)) |
|
85 |
| **License** | `cc-by-sa-4.0` |
|
|
|
118 |
| `MORPH_MICRO_P` | 97.75 |
|
119 |
| `MORPH_MICRO_R` | 97.09 |
|
120 |
| `MORPH_MICRO_F` | 97.42 |
|
121 |
+
| `LEMMA_ACC` | 98.64 |
|
122 |
| `BOUND_DEP_LAS` | 87.15 |
|
123 |
| `BOUND_DEP_UAS` | 91.19 |
|
124 |
| `DEP_UAS` | 91.17 |
|
125 |
| `DEP_LAS` | 87.13 |
|
126 |
+
| `ENTS_P` | 90.95 |
|
127 |
+
| `ENTS_R` | 90.79 |
|
128 |
+
| `ENTS_F` | 90.87 |
|
config.cfg
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
[paths]
|
2 |
-
tagger_model = "models/hu_core_news_trf-tagger-3.2.
|
3 |
-
parser_model = "models/hu_core_news_trf-parser-3.2.
|
4 |
-
ner_model = "models/hu_core_news_trf-ner-3.2.
|
|
|
5 |
train = null
|
6 |
dev = null
|
7 |
vectors = null
|
@@ -13,7 +14,7 @@ gpu_allocator = null
|
|
13 |
|
14 |
[nlp]
|
15 |
lang = "hu"
|
16 |
-
pipeline = ["transformer","senter","tagger","morphologizer","experimental_edit_tree_lemmatizer","experimental_arc_predicter","experimental_arc_labeler","ner"]
|
17 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
18 |
disabled = []
|
19 |
before_creation = null
|
@@ -103,6 +104,14 @@ grad_factor = 1.0
|
|
103 |
upstream = "transformer"
|
104 |
pooling = {"@layers":"reduce_mean.v1"}
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
[components.morphologizer]
|
107 |
factory = "morphologizer"
|
108 |
extend = false
|
|
|
1 |
[paths]
|
2 |
+
tagger_model = "models/hu_core_news_trf-tagger-3.2.2/model-best"
|
3 |
+
parser_model = "models/hu_core_news_trf-parser-3.2.2/model-best"
|
4 |
+
ner_model = "models/hu_core_news_trf-ner-3.2.2/model-best"
|
5 |
+
lemmatizer_lookups = "models/hu_core_news_trf-lookup-lemmatizer-3.2.2"
|
6 |
train = null
|
7 |
dev = null
|
8 |
vectors = null
|
|
|
14 |
|
15 |
[nlp]
|
16 |
lang = "hu"
|
17 |
+
pipeline = ["transformer","senter","tagger","morphologizer","lookup_lemmatizer","experimental_edit_tree_lemmatizer","lemma_smoother","experimental_arc_predicter","experimental_arc_labeler","ner"]
|
18 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
19 |
disabled = []
|
20 |
before_creation = null
|
|
|
104 |
upstream = "transformer"
|
105 |
pooling = {"@layers":"reduce_mean.v1"}
|
106 |
|
107 |
+
[components.lemma_smoother]
|
108 |
+
factory = "hu.lemma_smoother"
|
109 |
+
|
110 |
+
[components.lookup_lemmatizer]
|
111 |
+
factory = "hu.lookup_lemmatizer"
|
112 |
+
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
|
113 |
+
source = ${paths.lemmatizer_lookups}
|
114 |
+
|
115 |
[components.morphologizer]
|
116 |
factory = "morphologizer"
|
117 |
extend = false
|
experimental_arc_labeler/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 447476722
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9fc521c6aeb9d827e2439456aa44b28d4bd49845762e1ceda7d98e486a9e34d
|
3 |
size 447476722
|
experimental_arc_predicter/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 445185682
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c866a73a8ee7d3d8d53c8b6dc57b7929f16b501585cc0c6a15b06dc4b739dac
|
3 |
size 445185682
|
hu_core_news_trf-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75d7e1530fc472e1de19b73adb3da3cac98175a0c93498b423ee790aa4e220e6
|
3 |
+
size 1674182137
|
lookup_lemmatizer/lookups.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7aaa1cfd45a0afd57ada8ccc690ee182485a151bdb133b7b8c9276647ab9e60
|
3 |
+
size 2745978
|
meta.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"lang":"hu",
|
3 |
"name":"core_news_trf",
|
4 |
-
"version":"3.2.
|
5 |
"description":"Hungarian transformer pipeline (huBert) for HuSpaCy. Components: transformer, senter, tagger, morphologizer, lemmatizer, parser, ner",
|
6 |
"author":"SzegedAI, MILAB",
|
7 |
"email":"gyorgy@orosz.link",
|
@@ -1188,6 +1188,9 @@
|
|
1188 |
"Case=Dat|Number=Plur|POS=PRON|Person=1|PronType=Prs",
|
1189 |
"Case=Acc|Number=Plur|Number[psor]=Sing|POS=PROPN|Person[psor]=3",
|
1190 |
"Case=All|Number=Sing|Number[psed]=Sing|POS=PRON|Person=3|PronType=Tot"
|
|
|
|
|
|
|
1191 |
],
|
1192 |
"experimental_edit_tree_lemmatizer":[
|
1193 |
0,
|
@@ -7282,7 +7285,9 @@
|
|
7282 |
"senter",
|
7283 |
"tagger",
|
7284 |
"morphologizer",
|
|
|
7285 |
"experimental_edit_tree_lemmatizer",
|
|
|
7286 |
"experimental_arc_predicter",
|
7287 |
"experimental_arc_labeler",
|
7288 |
"ner"
|
@@ -7292,7 +7297,9 @@
|
|
7292 |
"senter",
|
7293 |
"tagger",
|
7294 |
"morphologizer",
|
|
|
7295 |
"experimental_edit_tree_lemmatizer",
|
|
|
7296 |
"experimental_arc_predicter",
|
7297 |
"experimental_arc_labeler",
|
7298 |
"ner"
|
@@ -7421,7 +7428,7 @@
|
|
7421 |
"f":0.8
|
7422 |
}
|
7423 |
},
|
7424 |
-
"lemma_acc":0.
|
7425 |
"bound_dep_las":0.8715420695,
|
7426 |
"bound_dep_uas":0.9119364411,
|
7427 |
"dep_uas":0.911718264,
|
@@ -7653,32 +7660,32 @@
|
|
7653 |
"f":0.0
|
7654 |
}
|
7655 |
},
|
7656 |
-
"ents_p":0.
|
7657 |
-
"ents_r":0.
|
7658 |
-
"ents_f":0.
|
7659 |
"ents_per_type":{
|
7660 |
"ORG":{
|
7661 |
-
"p":0.
|
7662 |
-
"r":0.
|
7663 |
-
"f":0.
|
7664 |
},
|
7665 |
"PER":{
|
7666 |
-
"p":0.
|
7667 |
-
"r":0.
|
7668 |
-
"f":0.
|
7669 |
},
|
7670 |
"LOC":{
|
7671 |
-
"p":0.
|
7672 |
-
"r":0.
|
7673 |
-
"f":0.
|
7674 |
},
|
7675 |
"MISC":{
|
7676 |
-
"p":0.
|
7677 |
-
"r":0.
|
7678 |
-
"f":0.
|
7679 |
}
|
7680 |
},
|
7681 |
-
"speed":
|
7682 |
},
|
7683 |
"sources":[
|
7684 |
{
|
|
|
1 |
{
|
2 |
"lang":"hu",
|
3 |
"name":"core_news_trf",
|
4 |
+
"version":"3.2.2",
|
5 |
"description":"Hungarian transformer pipeline (huBert) for HuSpaCy. Components: transformer, senter, tagger, morphologizer, lemmatizer, parser, ner",
|
6 |
"author":"SzegedAI, MILAB",
|
7 |
"email":"gyorgy@orosz.link",
|
|
|
1188 |
"Case=Dat|Number=Plur|POS=PRON|Person=1|PronType=Prs",
|
1189 |
"Case=Acc|Number=Plur|Number[psor]=Sing|POS=PROPN|Person[psor]=3",
|
1190 |
"Case=All|Number=Sing|Number[psed]=Sing|POS=PRON|Person=3|PronType=Tot"
|
1191 |
+
],
|
1192 |
+
"lookup_lemmatizer":[
|
1193 |
+
|
1194 |
],
|
1195 |
"experimental_edit_tree_lemmatizer":[
|
1196 |
0,
|
|
|
7285 |
"senter",
|
7286 |
"tagger",
|
7287 |
"morphologizer",
|
7288 |
+
"lookup_lemmatizer",
|
7289 |
"experimental_edit_tree_lemmatizer",
|
7290 |
+
"lemma_smoother",
|
7291 |
"experimental_arc_predicter",
|
7292 |
"experimental_arc_labeler",
|
7293 |
"ner"
|
|
|
7297 |
"senter",
|
7298 |
"tagger",
|
7299 |
"morphologizer",
|
7300 |
+
"lookup_lemmatizer",
|
7301 |
"experimental_edit_tree_lemmatizer",
|
7302 |
+
"lemma_smoother",
|
7303 |
"experimental_arc_predicter",
|
7304 |
"experimental_arc_labeler",
|
7305 |
"ner"
|
|
|
7428 |
"f":0.8
|
7429 |
}
|
7430 |
},
|
7431 |
+
"lemma_acc":0.9864127835,
|
7432 |
"bound_dep_las":0.8715420695,
|
7433 |
"bound_dep_uas":0.9119364411,
|
7434 |
"dep_uas":0.911718264,
|
|
|
7660 |
"f":0.0
|
7661 |
}
|
7662 |
},
|
7663 |
+
"ents_p":0.9094751673,
|
7664 |
+
"ents_r":0.9078762307,
|
7665 |
+
"ents_f":0.9086749956,
|
7666 |
"ents_per_type":{
|
7667 |
"ORG":{
|
7668 |
+
"p":0.9038718291,
|
7669 |
+
"r":0.9415855355,
|
7670 |
+
"f":0.9223433243
|
7671 |
},
|
7672 |
"PER":{
|
7673 |
+
"p":0.9425981873,
|
7674 |
+
"r":0.9318996416,
|
7675 |
+
"f":0.9372183839
|
7676 |
},
|
7677 |
"LOC":{
|
7678 |
+
"p":0.9434822242,
|
7679 |
+
"r":0.8984375,
|
7680 |
+
"f":0.9204090707
|
7681 |
},
|
7682 |
"MISC":{
|
7683 |
+
"p":0.7923416789,
|
7684 |
+
"r":0.7631205674,
|
7685 |
+
"f":0.7774566474
|
7686 |
}
|
7687 |
},
|
7688 |
+
"speed":3603.1106108543
|
7689 |
},
|
7690 |
"sources":[
|
7691 |
{
|
ner/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 443626204
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e513721ef78ad46ad0a39817bf0b82dd7b3ca2273155680f9cc6696f092d2a1
|
3 |
size 443626204
|
transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 443344004
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef1e9f39f0358ae8d9c4dc8e1427fc4ecf054e284ac5bba614cf9194266e2914
|
3 |
size 443344004
|