a100 commited on
Commit
a558337
1 Parent(s): 6598217

Update spacy pipeline to 3.2.2

Browse files
README.md CHANGED
@@ -14,13 +14,13 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.9078212291
18
  - name: NER Recall
19
  type: recall
20
- value: 0.9142053446
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.9110021023
24
  - task:
25
  name: TAG
26
  type: token-classification
@@ -48,7 +48,7 @@ model-index:
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
- value: 0.9863170988
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
@@ -76,10 +76,10 @@ Hungarian transformer pipeline (huBert) for HuSpaCy. Components: transformer, se
76
  | Feature | Description |
77
  | --- | --- |
78
  | **Name** | `hu_core_news_trf` |
79
- | **Version** | `3.2.1` |
80
  | **spaCy** | `>=3.2.4,<3.3.0` |
81
- | **Default Pipeline** | `transformer`, `senter`, `tagger`, `morphologizer`, `experimental_edit_tree_lemmatizer`, `experimental_arc_predicter`, `experimental_arc_labeler`, `ner` |
82
- | **Components** | `transformer`, `senter`, `tagger`, `morphologizer`, `experimental_edit_tree_lemmatizer`, `experimental_arc_predicter`, `experimental_arc_labeler`, `ner` |
83
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
84
  | **Sources** | [UD Hungarian Szeged](https://universaldependencies.org/treebanks/hu_szeged/index.html) (Richárd Farkas, Katalin Simkó, Zsolt Szántó, Viktor Varga, Veronika Vincze (MTA-SZTE Research Group on Artificial Intelligence))<br />[NYTK-NerKor Corpus](https://github.com/nytud/NYTK-NerKor) (Eszter Simon, Noémi Vadász (Department of Language Technology and Applied Linguistics))<br />[hunNERwiki](http://hlt.sztaki.hu/resources/hunnerwiki.html) (Eszter Simon, Dávid Márk Nemeskey (HLT Group, Budapest University of Technology and Economics))<br />[Szeged NER Corpus](https://rgai.inf.u-szeged.hu/node/130) (György Szarvas, Richárd Farkas, László Felföldi, András Kocsor, János Csirik (MTA-SZTE Research Group on Artificial Intelligence))<br />[huBERT base model (cased)](https://huggingface.co/SZTAKI-HLT/hubert-base-cc) (Dávid Márk Nemeskey (SZTAKI-HLT)) |
85
  | **License** | `cc-by-sa-4.0` |
@@ -118,11 +118,11 @@ Hungarian transformer pipeline (huBert) for HuSpaCy. Components: transformer, se
118
  | `MORPH_MICRO_P` | 97.75 |
119
  | `MORPH_MICRO_R` | 97.09 |
120
  | `MORPH_MICRO_F` | 97.42 |
121
- | `LEMMA_ACC` | 98.63 |
122
  | `BOUND_DEP_LAS` | 87.15 |
123
  | `BOUND_DEP_UAS` | 91.19 |
124
  | `DEP_UAS` | 91.17 |
125
  | `DEP_LAS` | 87.13 |
126
- | `ENTS_P` | 90.78 |
127
- | `ENTS_R` | 91.42 |
128
- | `ENTS_F` | 91.10 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.9094751673
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.9078762307
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.9086749956
24
  - task:
25
  name: TAG
26
  type: token-classification
 
48
  metrics:
49
  - name: Lemma Accuracy
50
  type: accuracy
51
+ value: 0.9864127835
52
  - task:
53
  name: UNLABELED_DEPENDENCIES
54
  type: token-classification
 
76
  | Feature | Description |
77
  | --- | --- |
78
  | **Name** | `hu_core_news_trf` |
79
+ | **Version** | `3.2.2` |
80
  | **spaCy** | `>=3.2.4,<3.3.0` |
81
+ | **Default Pipeline** | `transformer`, `senter`, `tagger`, `morphologizer`, `lookup_lemmatizer`, `experimental_edit_tree_lemmatizer`, `lemma_smoother`, `experimental_arc_predicter`, `experimental_arc_labeler`, `ner` |
82
+ | **Components** | `transformer`, `senter`, `tagger`, `morphologizer`, `lookup_lemmatizer`, `experimental_edit_tree_lemmatizer`, `lemma_smoother`, `experimental_arc_predicter`, `experimental_arc_labeler`, `ner` |
83
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
84
  | **Sources** | [UD Hungarian Szeged](https://universaldependencies.org/treebanks/hu_szeged/index.html) (Richárd Farkas, Katalin Simkó, Zsolt Szántó, Viktor Varga, Veronika Vincze (MTA-SZTE Research Group on Artificial Intelligence))<br />[NYTK-NerKor Corpus](https://github.com/nytud/NYTK-NerKor) (Eszter Simon, Noémi Vadász (Department of Language Technology and Applied Linguistics))<br />[hunNERwiki](http://hlt.sztaki.hu/resources/hunnerwiki.html) (Eszter Simon, Dávid Márk Nemeskey (HLT Group, Budapest University of Technology and Economics))<br />[Szeged NER Corpus](https://rgai.inf.u-szeged.hu/node/130) (György Szarvas, Richárd Farkas, László Felföldi, András Kocsor, János Csirik (MTA-SZTE Research Group on Artificial Intelligence))<br />[huBERT base model (cased)](https://huggingface.co/SZTAKI-HLT/hubert-base-cc) (Dávid Márk Nemeskey (SZTAKI-HLT)) |
85
  | **License** | `cc-by-sa-4.0` |
 
118
  | `MORPH_MICRO_P` | 97.75 |
119
  | `MORPH_MICRO_R` | 97.09 |
120
  | `MORPH_MICRO_F` | 97.42 |
121
+ | `LEMMA_ACC` | 98.64 |
122
  | `BOUND_DEP_LAS` | 87.15 |
123
  | `BOUND_DEP_UAS` | 91.19 |
124
  | `DEP_UAS` | 91.17 |
125
  | `DEP_LAS` | 87.13 |
126
+ | `ENTS_P` | 90.95 |
127
+ | `ENTS_R` | 90.79 |
128
+ | `ENTS_F` | 90.87 |
config.cfg CHANGED
@@ -1,7 +1,8 @@
1
  [paths]
2
- tagger_model = "models/hu_core_news_trf-tagger-3.2.1/model-best"
3
- parser_model = "models/hu_core_news_trf-parser-3.2.1/model-best"
4
- ner_model = "models/hu_core_news_trf-ner-3.2.1/model-best"
 
5
  train = null
6
  dev = null
7
  vectors = null
@@ -13,7 +14,7 @@ gpu_allocator = null
13
 
14
  [nlp]
15
  lang = "hu"
16
- pipeline = ["transformer","senter","tagger","morphologizer","experimental_edit_tree_lemmatizer","experimental_arc_predicter","experimental_arc_labeler","ner"]
17
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
18
  disabled = []
19
  before_creation = null
@@ -103,6 +104,14 @@ grad_factor = 1.0
103
  upstream = "transformer"
104
  pooling = {"@layers":"reduce_mean.v1"}
105
 
 
 
 
 
 
 
 
 
106
  [components.morphologizer]
107
  factory = "morphologizer"
108
  extend = false
 
1
  [paths]
2
+ tagger_model = "models/hu_core_news_trf-tagger-3.2.2/model-best"
3
+ parser_model = "models/hu_core_news_trf-parser-3.2.2/model-best"
4
+ ner_model = "models/hu_core_news_trf-ner-3.2.2/model-best"
5
+ lemmatizer_lookups = "models/hu_core_news_trf-lookup-lemmatizer-3.2.2"
6
  train = null
7
  dev = null
8
  vectors = null
 
14
 
15
  [nlp]
16
  lang = "hu"
17
+ pipeline = ["transformer","senter","tagger","morphologizer","lookup_lemmatizer","experimental_edit_tree_lemmatizer","lemma_smoother","experimental_arc_predicter","experimental_arc_labeler","ner"]
18
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
19
  disabled = []
20
  before_creation = null
 
104
  upstream = "transformer"
105
  pooling = {"@layers":"reduce_mean.v1"}
106
 
107
+ [components.lemma_smoother]
108
+ factory = "hu.lemma_smoother"
109
+
110
+ [components.lookup_lemmatizer]
111
+ factory = "hu.lookup_lemmatizer"
112
+ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
113
+ source = ${paths.lemmatizer_lookups}
114
+
115
  [components.morphologizer]
116
  factory = "morphologizer"
117
  extend = false
experimental_arc_labeler/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:990bd725ef174d1d6d38f22c6da304252fe49c5bb3a007727f65a38d38a145f5
3
  size 447476722
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9fc521c6aeb9d827e2439456aa44b28d4bd49845762e1ceda7d98e486a9e34d
3
  size 447476722
experimental_arc_predicter/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df945fcedd45cf4f83bea09366a04cf51e8a8bcc9409033108ae86e0c9cc0b18
3
  size 445185682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c866a73a8ee7d3d8d53c8b6dc57b7929f16b501585cc0c6a15b06dc4b739dac
3
  size 445185682
hu_core_news_trf-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca3e08013b4549af474c26465027b678cdf3ec2edbeb3d2dc3a2715f3e13aa96
3
- size 1672216913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75d7e1530fc472e1de19b73adb3da3cac98175a0c93498b423ee790aa4e220e6
3
+ size 1674182137
lookup_lemmatizer/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7aaa1cfd45a0afd57ada8ccc690ee182485a151bdb133b7b8c9276647ab9e60
3
+ size 2745978
meta.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "lang":"hu",
3
  "name":"core_news_trf",
4
- "version":"3.2.1",
5
  "description":"Hungarian transformer pipeline (huBert) for HuSpaCy. Components: transformer, senter, tagger, morphologizer, lemmatizer, parser, ner",
6
  "author":"SzegedAI, MILAB",
7
  "email":"gyorgy@orosz.link",
@@ -1188,6 +1188,9 @@
1188
  "Case=Dat|Number=Plur|POS=PRON|Person=1|PronType=Prs",
1189
  "Case=Acc|Number=Plur|Number[psor]=Sing|POS=PROPN|Person[psor]=3",
1190
  "Case=All|Number=Sing|Number[psed]=Sing|POS=PRON|Person=3|PronType=Tot"
 
 
 
1191
  ],
1192
  "experimental_edit_tree_lemmatizer":[
1193
  0,
@@ -7282,7 +7285,9 @@
7282
  "senter",
7283
  "tagger",
7284
  "morphologizer",
 
7285
  "experimental_edit_tree_lemmatizer",
 
7286
  "experimental_arc_predicter",
7287
  "experimental_arc_labeler",
7288
  "ner"
@@ -7292,7 +7297,9 @@
7292
  "senter",
7293
  "tagger",
7294
  "morphologizer",
 
7295
  "experimental_edit_tree_lemmatizer",
 
7296
  "experimental_arc_predicter",
7297
  "experimental_arc_labeler",
7298
  "ner"
@@ -7421,7 +7428,7 @@
7421
  "f":0.8
7422
  }
7423
  },
7424
- "lemma_acc":0.9863170988,
7425
  "bound_dep_las":0.8715420695,
7426
  "bound_dep_uas":0.9119364411,
7427
  "dep_uas":0.911718264,
@@ -7653,32 +7660,32 @@
7653
  "f":0.0
7654
  }
7655
  },
7656
- "ents_p":0.9078212291,
7657
- "ents_r":0.9142053446,
7658
- "ents_f":0.9110021023,
7659
  "ents_per_type":{
7660
  "ORG":{
7661
- "p":0.9280742459,
7662
- "r":0.9272137228,
7663
- "f":0.9276437848
7664
  },
7665
  "PER":{
7666
- "p":0.9423648247,
7667
- "r":0.9474313023,
7668
- "f":0.944891272
7669
  },
7670
  "LOC":{
7671
- "p":0.9258936356,
7672
- "r":0.921875,
7673
- "f":0.9238799478
7674
  },
7675
  "MISC":{
7676
- "p":0.7429340511,
7677
- "r":0.7829787234,
7678
- "f":0.7624309392
7679
  }
7680
  },
7681
- "speed":2488.2218404265
7682
  },
7683
  "sources":[
7684
  {
 
1
  {
2
  "lang":"hu",
3
  "name":"core_news_trf",
4
+ "version":"3.2.2",
5
  "description":"Hungarian transformer pipeline (huBert) for HuSpaCy. Components: transformer, senter, tagger, morphologizer, lemmatizer, parser, ner",
6
  "author":"SzegedAI, MILAB",
7
  "email":"gyorgy@orosz.link",
 
1188
  "Case=Dat|Number=Plur|POS=PRON|Person=1|PronType=Prs",
1189
  "Case=Acc|Number=Plur|Number[psor]=Sing|POS=PROPN|Person[psor]=3",
1190
  "Case=All|Number=Sing|Number[psed]=Sing|POS=PRON|Person=3|PronType=Tot"
1191
+ ],
1192
+ "lookup_lemmatizer":[
1193
+
1194
  ],
1195
  "experimental_edit_tree_lemmatizer":[
1196
  0,
 
7285
  "senter",
7286
  "tagger",
7287
  "morphologizer",
7288
+ "lookup_lemmatizer",
7289
  "experimental_edit_tree_lemmatizer",
7290
+ "lemma_smoother",
7291
  "experimental_arc_predicter",
7292
  "experimental_arc_labeler",
7293
  "ner"
 
7297
  "senter",
7298
  "tagger",
7299
  "morphologizer",
7300
+ "lookup_lemmatizer",
7301
  "experimental_edit_tree_lemmatizer",
7302
+ "lemma_smoother",
7303
  "experimental_arc_predicter",
7304
  "experimental_arc_labeler",
7305
  "ner"
 
7428
  "f":0.8
7429
  }
7430
  },
7431
+ "lemma_acc":0.9864127835,
7432
  "bound_dep_las":0.8715420695,
7433
  "bound_dep_uas":0.9119364411,
7434
  "dep_uas":0.911718264,
 
7660
  "f":0.0
7661
  }
7662
  },
7663
+ "ents_p":0.9094751673,
7664
+ "ents_r":0.9078762307,
7665
+ "ents_f":0.9086749956,
7666
  "ents_per_type":{
7667
  "ORG":{
7668
+ "p":0.9038718291,
7669
+ "r":0.9415855355,
7670
+ "f":0.9223433243
7671
  },
7672
  "PER":{
7673
+ "p":0.9425981873,
7674
+ "r":0.9318996416,
7675
+ "f":0.9372183839
7676
  },
7677
  "LOC":{
7678
+ "p":0.9434822242,
7679
+ "r":0.8984375,
7680
+ "f":0.9204090707
7681
  },
7682
  "MISC":{
7683
+ "p":0.7923416789,
7684
+ "r":0.7631205674,
7685
+ "f":0.7774566474
7686
  }
7687
  },
7688
+ "speed":3603.1106108543
7689
  },
7690
  "sources":[
7691
  {
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7965ae35085d883e8e9f9a5c5df11d763b93af32932f0c5a6e6040c49035fd4c
3
  size 443626204
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e513721ef78ad46ad0a39817bf0b82dd7b3ca2273155680f9cc6696f092d2a1
3
  size 443626204
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9f20173c2a1ea1f3d4e540eb5fc7a72daa8844c31a0bd74d9c76c83c054df90
3
  size 443344004
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef1e9f39f0358ae8d9c4dc8e1427fc4ecf054e284ac5bba614cf9194266e2914
3
  size 443344004