osanseviero commited on
Commit
c58aec4
1 Parent(s): 341304a

Update spaCy pipeline

Browse files
LICENSES_SOURCES CHANGED
@@ -1,4 +1,4 @@
1
- # UD Danish DDT v2.5
2
 
3
  * Author: Johannsen, Anders; Martínez Alonso, Héctor; Plank, Barbara
4
  * URL: https://github.com/UniversalDependencies/UD_Danish-DDT
 
1
+ # UD Danish DDT v2.8
2
 
3
  * Author: Johannsen, Anders; Martínez Alonso, Héctor; Plank, Barbara
4
  * URL: https://github.com/UniversalDependencies/UD_Danish-DDT
README.md CHANGED
@@ -4,7 +4,7 @@ tags:
4
  - token-classification
5
  language:
6
  - da
7
- license: CC-BY-SA-4.0
8
  model-index:
9
  - name: da_core_news_md
10
  results:
@@ -14,47 +14,47 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.8105263158
18
  - name: NER Recall
19
  type: recall
20
- value: 0.8020833333
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.8062827225
24
  - task:
25
  name: POS
26
  type: token-classification
27
  metrics:
28
  - name: POS Accuracy
29
  type: accuracy
30
- value: 0.9624213075
31
  - task:
32
  name: SENTER
33
  type: token-classification
34
  metrics:
35
  - name: SENTER Precision
36
  type: precision
37
- value: 0.8324420677
38
  - name: SENTER Recall
39
  type: recall
40
- value: 0.8280141844
41
  - name: SENTER F Score
42
  type: f_score
43
- value: 0.8302222222
44
  - task:
45
  name: UNLABELED_DEPENDENCIES
46
  type: token-classification
47
  metrics:
48
  - name: Unlabeled Dependencies Accuracy
49
  type: accuracy
50
- value: 0.8241604738
51
  - task:
52
  name: LABELED_DEPENDENCIES
53
  type: token-classification
54
  metrics:
55
  - name: Labeled Dependencies Accuracy
56
  type: accuracy
57
- value: 0.8241604738
58
  ---
59
  ### Details: https://spacy.io/models/da#da_core_news_md
60
 
@@ -63,12 +63,12 @@ Danish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, s
63
  | Feature | Description |
64
  | --- | --- |
65
  | **Name** | `da_core_news_md` |
66
- | **Version** | `3.1.0` |
67
- | **spaCy** | `>=3.1.0,<3.2.0` |
68
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
69
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
70
  | **Vectors** | 500000 keys, 20000 unique vectors (300 dimensions) |
71
- | **Sources** | [UD Danish DDT v2.5](https://github.com/UniversalDependencies/UD_Danish-DDT) (Johannsen, Anders; Martínez Alonso, Héctor; Plank, Barbara)<br />[DaNE](https://github.com/alexandrainst/danlp/blob/master/docs/datasets.md#danish-dependency-treebank-dane) (Rasmus Hvingelby, Amalie B. Pauli, Maria Barrett, Christina Rosted, Lasse M. Lidegaard, Anders Søgaard)<br />[Lemmatization Lists](https://github.com/michmech/lemmatization-lists/) (Michal Měchura)<br />[Explosion fastText Vectors (cbow, OSCAR Common Crawl + Wikipedia)](https://spacy.io) (Explosion) |
72
  | **License** | `CC BY-SA 4.0` |
73
  | **Author** | [Explosion](https://explosion.ai) |
74
 
@@ -76,12 +76,12 @@ Danish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, s
76
 
77
  <details>
78
 
79
- <summary>View label scheme (194 labels for 4 components)</summary>
80
 
81
  | Component | Labels |
82
  | --- | --- |
83
  | **`morphologizer`** | `AdpType=Prep\|POS=ADP`, `Definite=Ind\|Gender=Com\|Number=Sing\|POS=NOUN`, `Mood=Ind\|POS=AUX\|Tense=Pres\|VerbForm=Fin\|Voice=Act`, `POS=PROPN`, `Definite=Ind\|Number=Sing\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Definite=Def\|Gender=Neut\|Number=Sing\|POS=NOUN`, `POS=SCONJ`, `Definite=Def\|Gender=Com\|Number=Sing\|POS=NOUN`, `Mood=Ind\|POS=VERB\|Tense=Pres\|VerbForm=Fin\|Voice=Act`, `POS=ADV`, `Number=Plur\|POS=DET\|PronType=Dem`, `Degree=Pos\|Number=Plur\|POS=ADJ`, `Definite=Ind\|Gender=Com\|Number=Plur\|POS=NOUN`, `POS=PUNCT`, `POS=CCONJ`, `Definite=Ind\|Degree=Cmp\|Number=Sing\|POS=ADJ`, `Degree=Cmp\|POS=ADJ`, `POS=PRON\|PartType=Inf`, `Gender=Com\|Number=Sing\|POS=DET\|PronType=Ind`, `Definite=Ind\|Degree=Pos\|Number=Sing\|POS=ADJ`, `Case=Acc\|Gender=Neut\|Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Definite=Ind\|Gender=Neut\|Number=Plur\|POS=NOUN`, `Definite=Def\|Degree=Pos\|Number=Sing\|POS=ADJ`, `Gender=Neut\|Number=Sing\|POS=DET\|PronType=Dem`, `Degree=Pos\|POS=ADV`, `Definite=Def\|Number=Sing\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Definite=Ind\|Gender=Neut\|Number=Sing\|POS=NOUN`, `POS=PRON\|PronType=Dem`, `NumType=Card\|POS=NUM`, `Definite=Ind\|Degree=Pos\|Gender=Neut\|Number=Sing\|POS=ADJ`, `Case=Acc\|Gender=Com\|Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Degree=Pos\|Gender=Com\|Number=Sing\|POS=ADJ`, `Case=Nom\|Gender=Com\|Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `NumType=Ord\|POS=ADJ`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Mood=Ind\|POS=AUX\|Tense=Past\|VerbForm=Fin\|Voice=Act`, `POS=VERB\|VerbForm=Inf\|Voice=Act`, `Mood=Ind\|POS=VERB\|Tense=Past\|VerbForm=Fin\|Voice=Act`, `POS=NOUN`, `Mood=Ind\|POS=VERB\|Tense=Pres\|VerbForm=Fin\|Voice=Pass`, `POS=ADP\|PartType=Inf`, `Degree=Pos\|POS=ADJ`, `Definite=Def\|Gender=Com\|Number=Plur\|POS=NOUN`, `Number[psor]=Sing\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs`, `Case=Gen\|Definite=Def\|Gender=Com\|Number=Sing\|POS=NOUN`, `POS=AUX\|VerbForm=Inf\|Voice=Act`, `Definite=Ind\|Degree=Pos\|Gender=Com\|Number=Sing\|POS=ADJ`, `Gender=Com\|Number=Sing\|POS=DET\|PronType=Dem`, `Number=Plur\|POS=DET\|PronType=Ind`, `Gender=Com\|Number=Sing\|POS=PRON\|PronType=Ind`, `Case=Acc\|POS=PRON\|Person=3\|PronType=Prs\|Reflex=Yes`, `POS=PART\|PartType=Inf`, `Gender=Neut\|Number=Sing\|POS=DET\|PronType=Ind`, `Case=Acc\|Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `Case=Gen\|Definite=Def\|Gender=Neut\|Number=Sing\|POS=NOUN`, `Case=Nom\|Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `Case=Nom\|Gender=Com\|Number=Sing\|POS=PRON\|Person=1\|PronType=Prs`, `Case=Nom\|Gender=Com\|POS=PRON\|PronType=Ind`, `Gender=Neut\|Number=Sing\|POS=PRON\|PronType=Ind`, `Mood=Imp\|POS=VERB`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs`, `Definite=Ind\|Number=Sing\|POS=AUX\|Tense=Past\|VerbForm=Part`, `POS=X`, `Case=Nom\|Gender=Com\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Case=Gen\|Definite=Def\|Gender=Com\|Number=Plur\|POS=NOUN`, `POS=VERB\|Tense=Pres\|VerbForm=Part`, `Number=Plur\|POS=PRON\|PronType=Int,Rel`, `POS=VERB\|VerbForm=Inf\|Voice=Pass`, `Case=Gen\|Definite=Ind\|Gender=Com\|Number=Sing\|POS=NOUN`, `Degree=Cmp\|POS=ADV`, `POS=ADV\|PartType=Inf`, `Degree=Sup\|POS=ADV`, `Number=Plur\|POS=PRON\|PronType=Dem`, `Number=Plur\|POS=PRON\|PronType=Ind`, `Definite=Def\|Gender=Neut\|Number=Plur\|POS=NOUN`, `Case=Acc\|Gender=Com\|Number=Sing\|POS=PRON\|Person=1\|PronType=Prs`, `Case=Gen\|POS=PROPN`, `POS=ADP`, `Degree=Cmp\|Number=Plur\|POS=ADJ`, `Definite=Def\|Degree=Sup\|POS=ADJ`, `Gender=Neut\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs`, `Degree=Pos\|Number=Sing\|POS=ADJ`, `Number=Plur\|Number[psor]=Sing\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Gender=Com\|Number=Sing\|Number[psor]=Plur\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs\|Style=Form`, `Number=Plur\|POS=PRON\|PronType=Rcp`, `Case=Gen\|Degree=Cmp\|POS=ADJ`, `Case=Gen\|Definite=Def\|Gender=Neut\|Number=Plur\|POS=NOUN`, `Number[psor]=Plur\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs`, `POS=INTJ`, `Number=Plur\|Number[psor]=Sing\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs`, `Degree=Pos\|Gender=Neut\|Number=Sing\|POS=ADJ`, `Gender=Neut\|Number=Sing\|Number[psor]=Plur\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs\|Style=Form`, `Case=Acc\|Gender=Com\|Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=2\|Poss=Yes\|PronType=Prs`, `Case=Gen\|Definite=Ind\|Gender=Neut\|Number=Plur\|POS=NOUN`, `Number=Sing\|POS=PRON\|PronType=Int,Rel`, `Number=Plur\|Number[psor]=Plur\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs\|Style=Form`, `Gender=Neut\|Number=Sing\|POS=PRON\|PronType=Int,Rel`, `Definite=Def\|Degree=Sup\|Number=Plur\|POS=ADJ`, `Case=Nom\|Gender=Com\|Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `Gender=Neut\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Definite=Ind\|Number=Sing\|POS=NOUN`, `Number=Plur\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Number=Plur\|Number[psor]=Sing\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `POS=SYM`, `Case=Nom\|Gender=Com\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Degree=Sup\|POS=ADJ`, `Number=Plur\|POS=DET\|PronType=Ind\|Style=Arch`, `Case=Gen\|Gender=Com\|Number=Sing\|POS=DET\|PronType=Dem`, `Foreign=Yes\|POS=X`, `POS=DET\|Person=2\|Polite=Form\|Poss=Yes\|PronType=Prs`, `Gender=Neut\|Number=Sing\|POS=PRON\|PronType=Dem`, `Case=Acc\|Gender=Com\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Case=Gen\|Definite=Ind\|Gender=Neut\|Number=Sing\|POS=NOUN`, `Case=Gen\|POS=PRON\|PronType=Int,Rel`, `Gender=Com\|Number=Sing\|POS=PRON\|PronType=Dem`, `Abbr=Yes\|POS=X`, `Case=Gen\|Definite=Ind\|Gender=Com\|Number=Plur\|POS=NOUN`, `Definite=Def\|Degree=Abs\|POS=ADJ`, `Definite=Ind\|Degree=Sup\|Number=Sing\|POS=ADJ`, `Definite=Ind\|POS=NOUN`, `Gender=Com\|Number=Plur\|POS=NOUN`, `Number[psor]=Plur\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs`, `Gender=Com\|POS=PRON\|PronType=Int,Rel`, `Case=Nom\|Gender=Com\|Number=Plur\|POS=PRON\|Person=2\|PronType=Prs`, `Degree=Abs\|POS=ADV`, `POS=VERB\|VerbForm=Ger`, `POS=VERB\|Tense=Past\|VerbForm=Part`, `Definite=Def\|Degree=Sup\|Number=Sing\|POS=ADJ`, `Number=Plur\|Number[psor]=Plur\|POS=PRON\|Person=1\|Poss=Yes\|PronType=Prs\|Style=Form`, `Case=Gen\|Definite=Def\|Degree=Pos\|Number=Sing\|POS=ADJ`, `Case=Gen\|Degree=Pos\|Number=Plur\|POS=ADJ`, `Case=Acc\|Gender=Com\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Gender=Com\|Number=Sing\|POS=PRON\|PronType=Int,Rel`, `POS=VERB\|Tense=Pres`, `Case=Gen\|Number=Plur\|POS=DET\|PronType=Ind`, `Number[psor]=Plur\|POS=DET\|Person=2\|Poss=Yes\|PronType=Prs`, `POS=PRON\|Person=2\|Polite=Form\|Poss=Yes\|PronType=Prs`, `Gender=Neut\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=2\|Poss=Yes\|PronType=Prs`, `POS=AUX\|Tense=Pres\|VerbForm=Part`, `Mood=Ind\|POS=VERB\|Tense=Past\|VerbForm=Fin\|Voice=Pass`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Degree=Sup\|Number=Plur\|POS=ADJ`, `Case=Acc\|Gender=Com\|Number=Plur\|POS=PRON\|Person=2\|PronType=Prs`, `Gender=Neut\|Number=Sing\|Number[psor]=Sing\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Definite=Ind\|Number=Plur\|POS=NOUN`, `Case=Gen\|Number=Plur\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Mood=Imp\|POS=AUX`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=PRON\|Person=1\|Poss=Yes\|PronType=Prs`, `Number[psor]=Sing\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs`, `Definite=Def\|Gender=Com\|Number=Sing\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Number=Plur\|Number[psor]=Sing\|POS=DET\|Person=2\|Poss=Yes\|PronType=Prs`, `Case=Gen\|Gender=Com\|Number=Sing\|POS=DET\|PronType=Ind`, `Case=Gen\|POS=NOUN`, `Number[psor]=Plur\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs`, `POS=DET\|PronType=Dem`, `Definite=Def\|Number=Plur\|POS=NOUN` |
84
- | **`parser`** | `ROOT`, `acl:relcl`, `advcl`, `advmod`, `amod`, `appos`, `aux`, `case`, `cc`, `ccomp`, `compound:prt`, `conj`, `cop`, `dep`, `det`, `expl`, `fixed`, `flat`, `iobj`, `list`, `mark`, `nmod`, `nmod:poss`, `nsubj`, `nummod`, `obj`, `obl`, `obl:loc`, `obl:tmod`, `punct`, `xcomp` |
85
  | **`senter`** | `I`, `S` |
86
  | **`ner`** | `LOC`, `MISC`, `ORG`, `PER` |
87
 
@@ -92,15 +92,21 @@ Danish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, s
92
  | Type | Score |
93
  | --- | --- |
94
  | `TOKEN_ACC` | 99.95 |
95
- | `TAG_ACC` | 96.24 |
96
- | `POS_ACC` | 96.24 |
97
- | `MORPH_ACC` | 94.99 |
 
 
 
 
 
 
 
 
 
 
 
98
  | `LEMMA_ACC` | 84.91 |
99
- | `DEP_UAS` | 82.42 |
100
- | `DEP_LAS` | 78.37 |
101
- | `ENTS_P` | 81.05 |
102
- | `ENTS_R` | 80.21 |
103
- | `ENTS_F` | 80.63 |
104
- | `SENTS_P` | 83.24 |
105
- | `SENTS_R` | 82.80 |
106
- | `SENTS_F` | 83.02 |
 
4
  - token-classification
5
  language:
6
  - da
7
+ license: cc-by-sa-4.0
8
  model-index:
9
  - name: da_core_news_md
10
  results:
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.8075313808
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.8041666667
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.8058455115
24
  - task:
25
  name: POS
26
  type: token-classification
27
  metrics:
28
  - name: POS Accuracy
29
  type: accuracy
30
+ value: 0.962905569
31
  - task:
32
  name: SENTER
33
  type: token-classification
34
  metrics:
35
  - name: SENTER Precision
36
  type: precision
37
+ value: 0.908438061
38
  - name: SENTER Recall
39
  type: recall
40
+ value: 0.8971631206
41
  - name: SENTER F Score
42
  type: f_score
43
+ value: 0.902765388
44
  - task:
45
  name: UNLABELED_DEPENDENCIES
46
  type: token-classification
47
  metrics:
48
  - name: Unlabeled Dependencies Accuracy
49
  type: accuracy
50
+ value: 0.8220632614
51
  - task:
52
  name: LABELED_DEPENDENCIES
53
  type: token-classification
54
  metrics:
55
  - name: Labeled Dependencies Accuracy
56
  type: accuracy
57
+ value: 0.8220632614
58
  ---
59
  ### Details: https://spacy.io/models/da#da_core_news_md
60
 
 
63
  | Feature | Description |
64
  | --- | --- |
65
  | **Name** | `da_core_news_md` |
66
+ | **Version** | `3.2.0` |
67
+ | **spaCy** | `>=3.2.0,<3.3.0` |
68
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
69
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
70
  | **Vectors** | 500000 keys, 20000 unique vectors (300 dimensions) |
71
+ | **Sources** | [UD Danish DDT v2.8](https://github.com/UniversalDependencies/UD_Danish-DDT) (Johannsen, Anders; Martínez Alonso, Héctor; Plank, Barbara)<br />[DaNE](https://github.com/alexandrainst/danlp/blob/master/docs/datasets.md#danish-dependency-treebank-dane) (Rasmus Hvingelby, Amalie B. Pauli, Maria Barrett, Christina Rosted, Lasse M. Lidegaard, Anders Søgaard)<br />[Lemmatization Lists](https://github.com/michmech/lemmatization-lists/) (Michal Měchura)<br />[Explosion fastText Vectors (cbow, OSCAR Common Crawl + Wikipedia)](https://spacy.io) (Explosion) |
72
  | **License** | `CC BY-SA 4.0` |
73
  | **Author** | [Explosion](https://explosion.ai) |
74
 
 
76
 
77
  <details>
78
 
79
+ <summary>View label scheme (195 labels for 4 components)</summary>
80
 
81
  | Component | Labels |
82
  | --- | --- |
83
  | **`morphologizer`** | `AdpType=Prep\|POS=ADP`, `Definite=Ind\|Gender=Com\|Number=Sing\|POS=NOUN`, `Mood=Ind\|POS=AUX\|Tense=Pres\|VerbForm=Fin\|Voice=Act`, `POS=PROPN`, `Definite=Ind\|Number=Sing\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Definite=Def\|Gender=Neut\|Number=Sing\|POS=NOUN`, `POS=SCONJ`, `Definite=Def\|Gender=Com\|Number=Sing\|POS=NOUN`, `Mood=Ind\|POS=VERB\|Tense=Pres\|VerbForm=Fin\|Voice=Act`, `POS=ADV`, `Number=Plur\|POS=DET\|PronType=Dem`, `Degree=Pos\|Number=Plur\|POS=ADJ`, `Definite=Ind\|Gender=Com\|Number=Plur\|POS=NOUN`, `POS=PUNCT`, `POS=CCONJ`, `Definite=Ind\|Degree=Cmp\|Number=Sing\|POS=ADJ`, `Degree=Cmp\|POS=ADJ`, `POS=PRON\|PartType=Inf`, `Gender=Com\|Number=Sing\|POS=DET\|PronType=Ind`, `Definite=Ind\|Degree=Pos\|Number=Sing\|POS=ADJ`, `Case=Acc\|Gender=Neut\|Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Definite=Ind\|Gender=Neut\|Number=Plur\|POS=NOUN`, `Definite=Def\|Degree=Pos\|Number=Sing\|POS=ADJ`, `Gender=Neut\|Number=Sing\|POS=DET\|PronType=Dem`, `Degree=Pos\|POS=ADV`, `Definite=Def\|Number=Sing\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Definite=Ind\|Gender=Neut\|Number=Sing\|POS=NOUN`, `POS=PRON\|PronType=Dem`, `NumType=Card\|POS=NUM`, `Definite=Ind\|Degree=Pos\|Gender=Neut\|Number=Sing\|POS=ADJ`, `Case=Acc\|Gender=Com\|Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Degree=Pos\|Gender=Com\|Number=Sing\|POS=ADJ`, `Case=Nom\|Gender=Com\|Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `NumType=Ord\|POS=ADJ`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Mood=Ind\|POS=AUX\|Tense=Past\|VerbForm=Fin\|Voice=Act`, `POS=VERB\|VerbForm=Inf\|Voice=Act`, `Mood=Ind\|POS=VERB\|Tense=Past\|VerbForm=Fin\|Voice=Act`, `POS=NOUN`, `Mood=Ind\|POS=VERB\|Tense=Pres\|VerbForm=Fin\|Voice=Pass`, `POS=ADP\|PartType=Inf`, `Degree=Pos\|POS=ADJ`, `Definite=Def\|Gender=Com\|Number=Plur\|POS=NOUN`, `Number[psor]=Sing\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs`, `Case=Gen\|Definite=Def\|Gender=Com\|Number=Sing\|POS=NOUN`, `POS=AUX\|VerbForm=Inf\|Voice=Act`, `Definite=Ind\|Degree=Pos\|Gender=Com\|Number=Sing\|POS=ADJ`, `Gender=Com\|Number=Sing\|POS=DET\|PronType=Dem`, `Number=Plur\|POS=DET\|PronType=Ind`, `Gender=Com\|Number=Sing\|POS=PRON\|PronType=Ind`, `Case=Acc\|POS=PRON\|Person=3\|PronType=Prs\|Reflex=Yes`, `POS=PART\|PartType=Inf`, `Gender=Neut\|Number=Sing\|POS=DET\|PronType=Ind`, `Case=Acc\|Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `Case=Gen\|Definite=Def\|Gender=Neut\|Number=Sing\|POS=NOUN`, `Case=Nom\|Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `Case=Nom\|Gender=Com\|Number=Sing\|POS=PRON\|Person=1\|PronType=Prs`, `Case=Nom\|Gender=Com\|POS=PRON\|PronType=Ind`, `Gender=Neut\|Number=Sing\|POS=PRON\|PronType=Ind`, `Mood=Imp\|POS=VERB`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs`, `Definite=Ind\|Number=Sing\|POS=AUX\|Tense=Past\|VerbForm=Part`, `POS=X`, `Case=Nom\|Gender=Com\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Case=Gen\|Definite=Def\|Gender=Com\|Number=Plur\|POS=NOUN`, `POS=VERB\|Tense=Pres\|VerbForm=Part`, `Number=Plur\|POS=PRON\|PronType=Int,Rel`, `POS=VERB\|VerbForm=Inf\|Voice=Pass`, `Case=Gen\|Definite=Ind\|Gender=Com\|Number=Sing\|POS=NOUN`, `Degree=Cmp\|POS=ADV`, `POS=ADV\|PartType=Inf`, `Degree=Sup\|POS=ADV`, `Number=Plur\|POS=PRON\|PronType=Dem`, `Number=Plur\|POS=PRON\|PronType=Ind`, `Definite=Def\|Gender=Neut\|Number=Plur\|POS=NOUN`, `Case=Acc\|Gender=Com\|Number=Sing\|POS=PRON\|Person=1\|PronType=Prs`, `Case=Gen\|POS=PROPN`, `POS=ADP`, `Degree=Cmp\|Number=Plur\|POS=ADJ`, `Definite=Def\|Degree=Sup\|POS=ADJ`, `Gender=Neut\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs`, `Degree=Pos\|Number=Sing\|POS=ADJ`, `Number=Plur\|Number[psor]=Sing\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Gender=Com\|Number=Sing\|Number[psor]=Plur\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs\|Style=Form`, `Number=Plur\|POS=PRON\|PronType=Rcp`, `Case=Gen\|Degree=Cmp\|POS=ADJ`, `Case=Gen\|Definite=Def\|Gender=Neut\|Number=Plur\|POS=NOUN`, `Number[psor]=Plur\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs`, `POS=INTJ`, `Number=Plur\|Number[psor]=Sing\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs`, `Degree=Pos\|Gender=Neut\|Number=Sing\|POS=ADJ`, `Gender=Neut\|Number=Sing\|Number[psor]=Plur\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs\|Style=Form`, `Case=Acc\|Gender=Com\|Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=2\|Poss=Yes\|PronType=Prs`, `Case=Gen\|Definite=Ind\|Gender=Neut\|Number=Plur\|POS=NOUN`, `Number=Sing\|POS=PRON\|PronType=Int,Rel`, `Number=Plur\|Number[psor]=Plur\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs\|Style=Form`, `Gender=Neut\|Number=Sing\|POS=PRON\|PronType=Int,Rel`, `Definite=Def\|Degree=Sup\|Number=Plur\|POS=ADJ`, `Case=Nom\|Gender=Com\|Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `Gender=Neut\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Definite=Ind\|Number=Sing\|POS=NOUN`, `Number=Plur\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Number=Plur\|Number[psor]=Sing\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `POS=SYM`, `Case=Nom\|Gender=Com\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Degree=Sup\|POS=ADJ`, `Number=Plur\|POS=DET\|PronType=Ind\|Style=Arch`, `Case=Gen\|Gender=Com\|Number=Sing\|POS=DET\|PronType=Dem`, `Foreign=Yes\|POS=X`, `POS=DET\|Person=2\|Polite=Form\|Poss=Yes\|PronType=Prs`, `Gender=Neut\|Number=Sing\|POS=PRON\|PronType=Dem`, `Case=Acc\|Gender=Com\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Case=Gen\|Definite=Ind\|Gender=Neut\|Number=Sing\|POS=NOUN`, `Case=Gen\|POS=PRON\|PronType=Int,Rel`, `Gender=Com\|Number=Sing\|POS=PRON\|PronType=Dem`, `Abbr=Yes\|POS=X`, `Case=Gen\|Definite=Ind\|Gender=Com\|Number=Plur\|POS=NOUN`, `Definite=Def\|Degree=Abs\|POS=ADJ`, `Definite=Ind\|Degree=Sup\|Number=Sing\|POS=ADJ`, `Definite=Ind\|POS=NOUN`, `Gender=Com\|Number=Plur\|POS=NOUN`, `Number[psor]=Plur\|POS=DET\|Person=1\|Poss=Yes\|PronType=Prs`, `Gender=Com\|POS=PRON\|PronType=Int,Rel`, `Case=Nom\|Gender=Com\|Number=Plur\|POS=PRON\|Person=2\|PronType=Prs`, `Degree=Abs\|POS=ADV`, `POS=VERB\|VerbForm=Ger`, `POS=VERB\|Tense=Past\|VerbForm=Part`, `Definite=Def\|Degree=Sup\|Number=Sing\|POS=ADJ`, `Number=Plur\|Number[psor]=Plur\|POS=PRON\|Person=1\|Poss=Yes\|PronType=Prs\|Style=Form`, `Case=Gen\|Definite=Def\|Degree=Pos\|Number=Sing\|POS=ADJ`, `Case=Gen\|Degree=Pos\|Number=Plur\|POS=ADJ`, `Case=Acc\|Gender=Com\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Gender=Com\|Number=Sing\|POS=PRON\|PronType=Int,Rel`, `POS=VERB\|Tense=Pres`, `Case=Gen\|Number=Plur\|POS=DET\|PronType=Ind`, `Number[psor]=Plur\|POS=DET\|Person=2\|Poss=Yes\|PronType=Prs`, `POS=PRON\|Person=2\|Polite=Form\|Poss=Yes\|PronType=Prs`, `Gender=Neut\|Number=Sing\|Number[psor]=Sing\|POS=DET\|Person=2\|Poss=Yes\|PronType=Prs`, `POS=AUX\|Tense=Pres\|VerbForm=Part`, `Mood=Ind\|POS=VERB\|Tense=Past\|VerbForm=Fin\|Voice=Pass`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Degree=Sup\|Number=Plur\|POS=ADJ`, `Case=Acc\|Gender=Com\|Number=Plur\|POS=PRON\|Person=2\|PronType=Prs`, `Gender=Neut\|Number=Sing\|Number[psor]=Sing\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs\|Reflex=Yes`, `Definite=Ind\|Number=Plur\|POS=NOUN`, `Case=Gen\|Number=Plur\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Mood=Imp\|POS=AUX`, `Gender=Com\|Number=Sing\|Number[psor]=Sing\|POS=PRON\|Person=1\|Poss=Yes\|PronType=Prs`, `Number[psor]=Sing\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs`, `Definite=Def\|Gender=Com\|Number=Sing\|POS=VERB\|Tense=Past\|VerbForm=Part`, `Number=Plur\|Number[psor]=Sing\|POS=DET\|Person=2\|Poss=Yes\|PronType=Prs`, `Case=Gen\|Gender=Com\|Number=Sing\|POS=DET\|PronType=Ind`, `Case=Gen\|POS=NOUN`, `Number[psor]=Plur\|POS=PRON\|Person=3\|Poss=Yes\|PronType=Prs`, `POS=DET\|PronType=Dem`, `Definite=Def\|Number=Plur\|POS=NOUN` |
84
+ | **`parser`** | `ROOT`, `acl:relcl`, `advcl`, `advmod`, `advmod:lmod`, `amod`, `appos`, `aux`, `case`, `cc`, `ccomp`, `compound:prt`, `conj`, `cop`, `dep`, `det`, `expl`, `fixed`, `flat`, `iobj`, `list`, `mark`, `nmod`, `nmod:poss`, `nsubj`, `nummod`, `obj`, `obl`, `obl:lmod`, `obl:tmod`, `punct`, `xcomp` |
85
  | **`senter`** | `I`, `S` |
86
  | **`ner`** | `LOC`, `MISC`, `ORG`, `PER` |
87
 
 
92
  | Type | Score |
93
  | --- | --- |
94
  | `TOKEN_ACC` | 99.95 |
95
+ | `TOKEN_P` | 99.78 |
96
+ | `TOKEN_R` | 99.75 |
97
+ | `TOKEN_F` | 99.76 |
98
+ | `POS_ACC` | 96.29 |
99
+ | `MORPH_ACC` | 94.88 |
100
+ | `MORPH_MICRO_P` | 96.65 |
101
+ | `MORPH_MICRO_R` | 96.11 |
102
+ | `MORPH_MICRO_F` | 96.38 |
103
+ | `SENTS_P` | 90.84 |
104
+ | `SENTS_R` | 89.72 |
105
+ | `SENTS_F` | 90.28 |
106
+ | `DEP_UAS` | 82.21 |
107
+ | `DEP_LAS` | 78.13 |
108
+ | `TAG_ACC` | 96.29 |
109
  | `LEMMA_ACC` | 84.91 |
110
+ | `ENTS_P` | 80.75 |
111
+ | `ENTS_R` | 80.42 |
112
+ | `ENTS_F` | 80.58 |
 
 
 
 
 
accuracy.json CHANGED
@@ -1,88 +1,83 @@
1
  {
2
  "token_acc": 0.9994672349,
3
- "tag_acc": 0.9624213075,
4
- "pos_acc": 0.9624213075,
5
- "morph_acc": 0.9499273608,
6
- "lemma_acc": 0.8491041162,
7
- "dep_uas": 0.8241604738,
8
- "dep_las": 0.7836629791,
9
- "ents_p": 0.8105263158,
10
- "ents_r": 0.8020833333,
11
- "ents_f": 0.8062827225,
12
- "sents_p": 0.8324420677,
13
- "sents_r": 0.8280141844,
14
- "sents_f": 0.8302222222,
15
- "speed": 9986.5434782999,
16
  "morph_per_feat": {
17
  "Mood": {
18
- "p": 0.9770114943,
19
- "r": 0.9723546235,
20
- "f": 0.9746774964
21
  },
22
  "Tense": {
23
- "p": 0.9704097117,
24
- "r": 0.9631024096,
25
- "f": 0.9667422525
26
  },
27
  "VerbForm": {
28
- "p": 0.9599753695,
29
- "r": 0.9541003672,
30
- "f": 0.9570288521
31
  },
32
  "Voice": {
33
- "p": 0.9753363229,
34
- "r": 0.9753363229,
35
- "f": 0.9753363229
36
  },
37
  "Definite": {
38
- "p": 0.9676258993,
39
- "r": 0.9565389174,
40
- "f": 0.9620504669
41
  },
42
  "Gender": {
43
- "p": 0.9498495486,
44
- "r": 0.9441674975,
45
- "f": 0.947
46
  },
47
  "Number": {
48
- "p": 0.9639283834,
49
- "r": 0.9548774126,
50
- "f": 0.9593815514
51
  },
52
  "AdpType": {
53
- "p": 0.9991071429,
54
- "r": 0.9893899204,
55
- "f": 0.994224789
56
  },
57
  "PartType": {
58
- "p": 0.996763754,
59
  "r": 1.0,
60
- "f": 0.9983792545
61
  },
62
  "Case": {
63
- "p": 0.9757673667,
64
- "r": 0.9541864139,
65
- "f": 0.96485623
66
  },
67
  "Person": {
68
- "p": 0.972027972,
69
- "r": 0.9875666075,
70
- "f": 0.9797356828
71
  },
72
  "PronType": {
73
- "p": 0.9835796388,
74
- "r": 0.9851973684,
75
- "f": 0.9843878389
76
  },
77
  "NumType": {
78
- "p": 0.9727891156,
79
  "r": 0.9470198675,
80
- "f": 0.9597315436
81
  },
82
  "Degree": {
83
- "p": 0.9538274605,
84
- "r": 0.9457831325,
85
- "f": 0.9497882638
86
  },
87
  "Reflex": {
88
  "p": 1.0,
@@ -95,14 +90,14 @@
95
  "f": 0.9828571429
96
  },
97
  "Poss": {
98
- "p": 0.9887640449,
99
  "r": 1.0,
100
- "f": 0.9943502825
101
  },
102
  "Foreign": {
103
- "p": 1.0,
104
  "r": 0.4,
105
- "f": 0.5714285714
106
  },
107
  "Abbr": {
108
  "p": 0.0,
@@ -115,141 +110,146 @@
115
  "f": 1.0
116
  },
117
  "Polite": {
118
- "p": 0.6666666667,
119
- "r": 0.5,
120
- "f": 0.5714285714
121
  }
122
  },
 
 
 
 
 
123
  "dep_las_per_type": {
124
  "advmod": {
125
- "p": 0.7148760331,
126
- "r": 0.7330508475,
127
- "f": 0.7238493724
128
  },
129
  "root": {
130
- "p": 0.8327402135,
131
- "r": 0.829787234,
132
- "f": 0.8312611012
133
  },
134
  "nsubj": {
135
- "p": 0.8338590957,
136
- "r": 0.8364978903,
137
- "f": 0.8351764086
138
  },
139
  "case": {
140
- "p": 0.882642998,
141
- "r": 0.8843873518,
142
- "f": 0.8835143139
143
  },
144
  "obl": {
145
- "p": 0.7138314785,
146
- "r": 0.6982892691,
147
- "f": 0.7059748428
148
  },
149
  "cc": {
150
- "p": 0.7636887608,
151
- "r": 0.7703488372,
152
- "f": 0.7670043415
153
  },
154
  "conj": {
155
- "p": 0.6164383562,
156
- "r": 0.6,
157
- "f": 0.6081081081
158
  },
159
  "obj": {
160
- "p": 0.8198874296,
161
  "r": 0.8485436893,
162
- "f": 0.8339694656
163
  },
164
  "aux": {
165
- "p": 0.8918918919,
166
- "r": 0.8658892128,
167
- "f": 0.8786982249
168
  },
169
  "acl:relcl": {
170
- "p": 0.625698324,
171
- "r": 0.6054054054,
172
- "f": 0.6153846154
173
  },
174
- "obl:loc": {
175
- "p": 0.7536231884,
176
- "r": 0.7428571429,
177
- "f": 0.7482014388
178
  },
179
  "det": {
180
- "p": 0.9168053245,
181
- "r": 0.9077429984,
182
- "f": 0.9122516556
183
  },
184
  "amod": {
185
- "p": 0.830449827,
186
- "r": 0.819112628,
187
- "f": 0.824742268
188
  },
189
  "nmod:poss": {
190
- "p": 0.7,
191
- "r": 0.6930693069,
192
- "f": 0.6965174129
193
  },
194
  "ccomp": {
195
- "p": 0.6111111111,
196
- "r": 0.7096774194,
197
- "f": 0.6567164179
198
  },
199
  "nummod": {
200
- "p": 0.8760330579,
201
- "r": 0.8833333333,
202
- "f": 0.8796680498
203
  },
204
  "flat": {
205
- "p": 0.7914110429,
206
- "r": 0.8543046358,
207
- "f": 0.821656051
208
  },
209
  "compound:prt": {
210
- "p": 0.4583333333,
211
- "r": 0.2682926829,
212
- "f": 0.3384615385
213
  },
214
  "advcl": {
215
- "p": 0.6465517241,
216
- "r": 0.6465517241,
217
- "f": 0.6465517241
218
  },
219
  "mark": {
220
- "p": 0.8802521008,
221
- "r": 0.8603696099,
222
- "f": 0.8701973001
223
  },
224
  "cop": {
225
- "p": 0.8087431694,
226
- "r": 0.8457142857,
227
- "f": 0.8268156425
228
  },
229
  "dep": {
230
- "p": 0.1573033708,
231
  "r": 0.2641509434,
232
- "f": 0.1971830986
233
  },
234
  "nmod": {
235
- "p": 0.6455445545,
236
- "r": 0.63671875,
237
- "f": 0.6411012783
238
  },
239
  "iobj": {
240
- "p": 0.7222222222,
241
  "r": 0.5909090909,
242
- "f": 0.65
243
  },
244
  "xcomp": {
245
- "p": 0.6216216216,
246
- "r": 0.3898305085,
247
- "f": 0.4791666667
248
  },
249
  "list": {
250
- "p": 0.375,
251
- "r": 0.3333333333,
252
- "f": 0.3529411765
253
  },
254
  "vocative": {
255
  "p": 0.0,
@@ -257,51 +257,62 @@
257
  "f": 0.0
258
  },
259
  "fixed": {
260
- "p": 0.8974358974,
261
- "r": 0.8333333333,
262
- "f": 0.8641975309
263
  },
264
  "expl": {
265
- "p": 0.8181818182,
266
- "r": 0.7941176471,
267
- "f": 0.8059701493
268
  },
269
  "appos": {
270
- "p": 0.5151515152,
271
  "r": 0.5151515152,
272
- "f": 0.5151515152
273
  },
274
  "obl:tmod": {
275
- "p": 0.5,
276
- "r": 0.3888888889,
277
- "f": 0.4375
278
  },
279
  "discourse": {
280
  "p": 0.0,
281
  "r": 0.0,
282
  "f": 0.0
 
 
 
 
 
283
  }
284
  },
 
 
 
 
 
285
  "ents_per_type": {
286
  "PER": {
287
- "p": 0.8867924528,
288
  "r": 0.8493975904,
289
- "f": 0.8676923077
290
  },
291
  "ORG": {
292
- "p": 0.7325581395,
293
- "r": 0.7,
294
- "f": 0.7159090909
295
  },
296
  "MISC": {
297
- "p": 0.7155172414,
298
- "r": 0.7345132743,
299
- "f": 0.7248908297
300
  },
301
  "LOC": {
302
- "p": 0.8596491228,
303
- "r": 0.8828828829,
304
- "f": 0.8711111111
305
  }
306
- }
 
307
  }
 
1
  {
2
  "token_acc": 0.9994672349,
3
+ "token_p": 0.9977732598,
4
+ "token_r": 0.9974835463,
5
+ "token_f": 0.997628382,
6
+ "pos_acc": 0.962905569,
7
+ "morph_acc": 0.9487651332,
8
+ "morph_micro_p": 0.9664501066,
9
+ "morph_micro_r": 0.9611327041,
10
+ "morph_micro_f": 0.9637840711,
 
 
 
 
 
11
  "morph_per_feat": {
12
  "Mood": {
13
+ "p": 0.9789674952,
14
+ "r": 0.9761677788,
15
+ "f": 0.9775656325
16
  },
17
  "Tense": {
18
+ "p": 0.9698340875,
19
+ "r": 0.968373494,
20
+ "f": 0.9691032404
21
  },
22
  "VerbForm": {
23
+ "p": 0.9631449631,
24
+ "r": 0.9596083231,
25
+ "f": 0.9613733906
26
  },
27
  "Voice": {
28
+ "p": 0.9782934132,
29
+ "r": 0.9768310912,
30
+ "f": 0.9775617053
31
  },
32
  "Definite": {
33
+ "p": 0.9633027523,
34
+ "r": 0.9541683129,
35
+ "f": 0.9587137753
36
  },
37
  "Gender": {
38
+ "p": 0.9459098497,
39
+ "r": 0.9415088069,
40
+ "f": 0.9437041972
41
  },
42
  "Number": {
43
+ "p": 0.9618621778,
44
+ "r": 0.9538341158,
45
+ "f": 0.9578313253
46
  },
47
  "AdpType": {
48
+ "p": 0.9973333333,
49
+ "r": 0.9920424403,
50
+ "f": 0.9946808511
51
  },
52
  "PartType": {
53
+ "p": 1.0,
54
  "r": 1.0,
55
+ "f": 1.0
56
  },
57
  "Case": {
58
+ "p": 0.9696485623,
59
+ "r": 0.9589257504,
60
+ "f": 0.9642573471
61
  },
62
  "Person": {
63
+ "p": 0.9704347826,
64
+ "r": 0.9911190053,
65
+ "f": 0.9806678383
66
  },
67
  "PronType": {
68
+ "p": 0.9811165846,
69
+ "r": 0.9827302632,
70
+ "f": 0.9819227609
71
  },
72
  "NumType": {
73
+ "p": 0.9794520548,
74
  "r": 0.9470198675,
75
+ "f": 0.962962963
76
  },
77
  "Degree": {
78
+ "p": 0.9520295203,
79
+ "r": 0.9325301205,
80
+ "f": 0.942178941
81
  },
82
  "Reflex": {
83
  "p": 1.0,
 
90
  "f": 0.9828571429
91
  },
92
  "Poss": {
93
+ "p": 0.9777777778,
94
  "r": 1.0,
95
+ "f": 0.9887640449
96
  },
97
  "Foreign": {
98
+ "p": 0.6666666667,
99
  "r": 0.4,
100
+ "f": 0.5
101
  },
102
  "Abbr": {
103
  "p": 0.0,
 
110
  "f": 1.0
111
  },
112
  "Polite": {
113
+ "p": 1.0,
114
+ "r": 0.75,
115
+ "f": 0.8571428571
116
  }
117
  },
118
+ "sents_p": 0.908438061,
119
+ "sents_r": 0.8971631206,
120
+ "sents_f": 0.902765388,
121
+ "dep_uas": 0.8220632614,
122
+ "dep_las": 0.7813355686,
123
  "dep_las_per_type": {
124
  "advmod": {
125
+ "p": 0.7073509015,
126
+ "r": 0.7203389831,
127
+ "f": 0.7137858642
128
  },
129
  "root": {
130
+ "p": 0.8411552347,
131
+ "r": 0.8262411348,
132
+ "f": 0.8336314848
133
  },
134
  "nsubj": {
135
+ "p": 0.8428417653,
136
+ "r": 0.8259493671,
137
+ "f": 0.8343100693
138
  },
139
  "case": {
140
+ "p": 0.881372549,
141
+ "r": 0.8865877712,
142
+ "f": 0.883972468
143
  },
144
  "obl": {
145
+ "p": 0.7053291536,
146
+ "r": 0.698757764,
147
+ "f": 0.7020280811
148
  },
149
  "cc": {
150
+ "p": 0.7851002865,
151
+ "r": 0.7965116279,
152
+ "f": 0.7907647908
153
  },
154
  "conj": {
155
+ "p": 0.6491712707,
156
+ "r": 0.6266666667,
157
+ "f": 0.6377204885
158
  },
159
  "obj": {
160
+ "p": 0.8033088235,
161
  "r": 0.8485436893,
162
+ "f": 0.8253068933
163
  },
164
  "aux": {
165
+ "p": 0.875739645,
166
+ "r": 0.8629737609,
167
+ "f": 0.8693098385
168
  },
169
  "acl:relcl": {
170
+ "p": 0.5879120879,
171
+ "r": 0.5783783784,
172
+ "f": 0.583106267
173
  },
174
+ "advmod:lmod": {
175
+ "p": 0.7586206897,
176
+ "r": 0.6567164179,
177
+ "f": 0.704
178
  },
179
  "det": {
180
+ "p": 0.9194078947,
181
+ "r": 0.92092257,
182
+ "f": 0.9201646091
183
  },
184
  "amod": {
185
+ "p": 0.8193979933,
186
+ "r": 0.8361774744,
187
+ "f": 0.8277027027
188
  },
189
  "nmod:poss": {
190
+ "p": 0.7340425532,
191
+ "r": 0.6831683168,
192
+ "f": 0.7076923077
193
  },
194
  "ccomp": {
195
+ "p": 0.5652173913,
196
+ "r": 0.6290322581,
197
+ "f": 0.5954198473
198
  },
199
  "nummod": {
200
+ "p": 0.8306451613,
201
+ "r": 0.8583333333,
202
+ "f": 0.8442622951
203
  },
204
  "flat": {
205
+ "p": 0.7636363636,
206
+ "r": 0.8344370861,
207
+ "f": 0.7974683544
208
  },
209
  "compound:prt": {
210
+ "p": 0.5,
211
+ "r": 0.3170731707,
212
+ "f": 0.3880597015
213
  },
214
  "advcl": {
215
+ "p": 0.6339285714,
216
+ "r": 0.6120689655,
217
+ "f": 0.6228070175
218
  },
219
  "mark": {
220
+ "p": 0.8905579399,
221
+ "r": 0.8521560575,
222
+ "f": 0.870933893
223
  },
224
  "cop": {
225
+ "p": 0.7837837838,
226
+ "r": 0.8285714286,
227
+ "f": 0.8055555556
228
  },
229
  "dep": {
230
+ "p": 0.1555555556,
231
  "r": 0.2641509434,
232
+ "f": 0.1958041958
233
  },
234
  "nmod": {
235
+ "p": 0.6352941176,
236
+ "r": 0.6328125,
237
+ "f": 0.6340508806
238
  },
239
  "iobj": {
240
+ "p": 0.8125,
241
  "r": 0.5909090909,
242
+ "f": 0.6842105263
243
  },
244
  "xcomp": {
245
+ "p": 0.5675675676,
246
+ "r": 0.3559322034,
247
+ "f": 0.4375
248
  },
249
  "list": {
250
+ "p": 0.4,
251
+ "r": 0.2222222222,
252
+ "f": 0.2857142857
253
  },
254
  "vocative": {
255
  "p": 0.0,
 
257
  "f": 0.0
258
  },
259
  "fixed": {
260
+ "p": 0.8918918919,
261
+ "r": 0.8048780488,
262
+ "f": 0.8461538462
263
  },
264
  "expl": {
265
+ "p": 0.8484848485,
266
+ "r": 0.8235294118,
267
+ "f": 0.8358208955
268
  },
269
  "appos": {
270
+ "p": 0.4146341463,
271
  "r": 0.5151515152,
272
+ "f": 0.4594594595
273
  },
274
  "obl:tmod": {
275
+ "p": 0.6,
276
+ "r": 0.3333333333,
277
+ "f": 0.4285714286
278
  },
279
  "discourse": {
280
  "p": 0.0,
281
  "r": 0.0,
282
  "f": 0.0
283
+ },
284
+ "obl:lmod": {
285
+ "p": 0.0,
286
+ "r": 0.0,
287
+ "f": 0.0
288
  }
289
  },
290
+ "tag_acc": 0.962905569,
291
+ "lemma_acc": 0.8491041162,
292
+ "ents_p": 0.8075313808,
293
+ "ents_r": 0.8041666667,
294
+ "ents_f": 0.8058455115,
295
  "ents_per_type": {
296
  "PER": {
297
+ "p": 0.9215686275,
298
  "r": 0.8493975904,
299
+ "f": 0.8840125392
300
  },
301
  "ORG": {
302
+ "p": 0.7032967033,
303
+ "r": 0.7111111111,
304
+ "f": 0.7071823204
305
  },
306
  "MISC": {
307
+ "p": 0.7179487179,
308
+ "r": 0.7433628319,
309
+ "f": 0.7304347826
310
  },
311
  "LOC": {
312
+ "p": 0.8290598291,
313
+ "r": 0.8738738739,
314
+ "f": 0.850877193
315
  }
316
+ },
317
+ "speed": 10055.5372609128
318
  }
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -1,10 +1,8 @@
1
  [paths]
2
- train = "corpus/da-core-news/train.spacy"
3
- dev = "corpus/da-core-news/dev.spacy"
4
- vectors = "corpus/da_vectors"
5
- raw = null
6
  init_tok2vec = null
7
- vocab_data = null
8
 
9
  [system]
10
  gpu_allocator = null
@@ -24,6 +22,7 @@ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
24
 
25
  [components.attribute_ruler]
26
  factory = "attribute_ruler"
 
27
  validate = false
28
 
29
  [components.lemmatizer]
@@ -31,9 +30,13 @@ factory = "lemmatizer"
31
  mode = "lookup"
32
  model = null
33
  overwrite = false
 
34
 
35
  [components.morphologizer]
36
  factory = "morphologizer"
 
 
 
37
 
38
  [components.morphologizer.model]
39
  @architectures = "spacy.Tagger.v1"
@@ -48,6 +51,7 @@ upstream = "tok2vec"
48
  factory = "ner"
49
  incorrect_spans_key = null
50
  moves = null
 
51
  update_with_oracle_cut_size = 100
52
 
53
  [components.ner.model]
@@ -65,8 +69,8 @@ nO = null
65
  [components.ner.model.tok2vec.embed]
66
  @architectures = "spacy.MultiHashEmbed.v2"
67
  width = 96
68
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
69
- rows = [5000,2500,2500,2500]
70
  include_static_vectors = true
71
 
72
  [components.ner.model.tok2vec.encode]
@@ -81,6 +85,7 @@ factory = "parser"
81
  learn_tokens = false
82
  min_action_freq = 30
83
  moves = null
 
84
  update_with_oracle_cut_size = 100
85
 
86
  [components.parser.model]
@@ -99,6 +104,8 @@ upstream = "tok2vec"
99
 
100
  [components.senter]
101
  factory = "senter"
 
 
102
 
103
  [components.senter.model]
104
  @architectures = "spacy.Tagger.v1"
@@ -110,8 +117,8 @@ nO = null
110
  [components.senter.model.tok2vec.embed]
111
  @architectures = "spacy.MultiHashEmbed.v2"
112
  width = 16
113
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
114
- rows = [1000,500,500,500]
115
  include_static_vectors = true
116
 
117
  [components.senter.model.tok2vec.encode]
@@ -130,8 +137,8 @@ factory = "tok2vec"
130
  [components.tok2vec.model.embed]
131
  @architectures = "spacy.MultiHashEmbed.v2"
132
  width = ${components.tok2vec.model.encode:width}
133
- attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
134
- rows = [5000,2500,2500,2500]
135
  include_static_vectors = true
136
 
137
  [components.tok2vec.model.encode]
@@ -145,22 +152,19 @@ maxout_pieces = 3
145
 
146
  [corpora.dev]
147
  @readers = "spacy.Corpus.v1"
148
- limit = 0
149
- max_length = 0
150
- path = ${paths:dev}
151
  gold_preproc = false
 
 
152
  augmenter = null
153
 
154
  [corpora.train]
155
  @readers = "spacy.Corpus.v1"
156
- path = ${paths:train}
157
- max_length = 5000
158
  gold_preproc = false
 
159
  limit = 0
160
-
161
- [corpora.train.augmenter]
162
- @augmenters = "spacy.lower_case.v1"
163
- level = 0.1
164
 
165
  [training]
166
  train_corpus = "corpora.train"
@@ -191,9 +195,8 @@ compound = 1.001
191
  t = 0.0
192
 
193
  [training.logger]
194
- @loggers = "spacy.WandbLogger.v1"
195
- project_name = "spacy-v3.0.0a2"
196
- remove_config_values = []
197
 
198
  [training.optimizer]
199
  @optimizers = "Adam.v1"
@@ -216,16 +219,17 @@ dep_las_per_type = null
216
  sents_p = null
217
  sents_r = null
218
  sents_f = 0.02
219
- lemma_acc = 0.33
220
- ents_f = 0.33
221
  ents_p = 0.0
222
  ents_r = 0.0
223
  ents_per_type = null
 
224
 
225
  [pretraining]
226
 
227
  [initialize]
228
- vocab_data = ${paths.vocab_data}
229
  vectors = ${paths.vectors}
230
  init_tok2vec = ${paths.init_tok2vec}
231
  before_init = null
 
1
  [paths]
2
+ train = null
3
+ dev = null
4
+ vectors = null
 
5
  init_tok2vec = null
 
6
 
7
  [system]
8
  gpu_allocator = null
 
22
 
23
  [components.attribute_ruler]
24
  factory = "attribute_ruler"
25
+ scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
26
  validate = false
27
 
28
  [components.lemmatizer]
 
30
  mode = "lookup"
31
  model = null
32
  overwrite = false
33
+ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
34
 
35
  [components.morphologizer]
36
  factory = "morphologizer"
37
+ extend = false
38
+ overwrite = true
39
+ scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
40
 
41
  [components.morphologizer.model]
42
  @architectures = "spacy.Tagger.v1"
 
51
  factory = "ner"
52
  incorrect_spans_key = null
53
  moves = null
54
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
55
  update_with_oracle_cut_size = 100
56
 
57
  [components.ner.model]
 
69
  [components.ner.model.tok2vec.embed]
70
  @architectures = "spacy.MultiHashEmbed.v2"
71
  width = 96
72
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
73
+ rows = [5000,2500,2500,2500,100]
74
  include_static_vectors = true
75
 
76
  [components.ner.model.tok2vec.encode]
 
85
  learn_tokens = false
86
  min_action_freq = 30
87
  moves = null
88
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
89
  update_with_oracle_cut_size = 100
90
 
91
  [components.parser.model]
 
104
 
105
  [components.senter]
106
  factory = "senter"
107
+ overwrite = false
108
+ scorer = {"@scorers":"spacy.senter_scorer.v1"}
109
 
110
  [components.senter.model]
111
  @architectures = "spacy.Tagger.v1"
 
117
  [components.senter.model.tok2vec.embed]
118
  @architectures = "spacy.MultiHashEmbed.v2"
119
  width = 16
120
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
121
+ rows = [1000,500,500,500,50]
122
  include_static_vectors = true
123
 
124
  [components.senter.model.tok2vec.encode]
 
137
  [components.tok2vec.model.embed]
138
  @architectures = "spacy.MultiHashEmbed.v2"
139
  width = ${components.tok2vec.model.encode:width}
140
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
141
+ rows = [5000,2500,2500,2500,100]
142
  include_static_vectors = true
143
 
144
  [components.tok2vec.model.encode]
 
152
 
153
  [corpora.dev]
154
  @readers = "spacy.Corpus.v1"
155
+ path = ${paths.dev}
 
 
156
  gold_preproc = false
157
+ max_length = 0
158
+ limit = 0
159
  augmenter = null
160
 
161
  [corpora.train]
162
  @readers = "spacy.Corpus.v1"
163
+ path = ${paths.train}
 
164
  gold_preproc = false
165
+ max_length = 0
166
  limit = 0
167
+ augmenter = null
 
 
 
168
 
169
  [training]
170
  train_corpus = "corpora.train"
 
195
  t = 0.0
196
 
197
  [training.logger]
198
+ @loggers = "spacy.ConsoleLogger.v1"
199
+ progress_bar = false
 
200
 
201
  [training.optimizer]
202
  @optimizers = "Adam.v1"
 
219
  sents_p = null
220
  sents_r = null
221
  sents_f = 0.02
222
+ lemma_acc = 0.5
223
+ ents_f = 0.16
224
  ents_p = 0.0
225
  ents_r = 0.0
226
  ents_per_type = null
227
+ speed = 0.0
228
 
229
  [pretraining]
230
 
231
  [initialize]
232
+ vocab_data = null
233
  vectors = ${paths.vectors}
234
  init_tok2vec = ${paths.init_tok2vec}
235
  before_init = null
da_core_news_md-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b33beeb80d0fa69fe1f07837ec2639f67af747ec95739fed14f3134e8756827
3
- size 48331499
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0195b5af78d68d4f3a6895cfa3cde98871682bbddc4b04c375770980fac4d222
3
+ size 48943545
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"da",
3
  "name":"core_news_md",
4
- "version":"3.1.0",
5
  "description":"Danish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 4.0",
10
- "spacy_version":">=3.1.0,<3.2.0",
11
- "spacy_git_version":"caba63b74",
12
  "vectors":{
13
  "width":300,
14
  "vectors":20000,
@@ -183,6 +183,7 @@
183
  "acl:relcl",
184
  "advcl",
185
  "advmod",
 
186
  "amod",
187
  "appos",
188
  "aux",
@@ -206,7 +207,7 @@
206
  "nummod",
207
  "obj",
208
  "obl",
209
- "obl:loc",
210
  "obl:tmod",
211
  "punct",
212
  "xcomp"
@@ -250,89 +251,84 @@
250
  ],
251
  "performance":{
252
  "token_acc":0.9994672349,
253
- "tag_acc":0.9624213075,
254
- "pos_acc":0.9624213075,
255
- "morph_acc":0.9499273608,
256
- "lemma_acc":0.8491041162,
257
- "dep_uas":0.8241604738,
258
- "dep_las":0.7836629791,
259
- "ents_p":0.8105263158,
260
- "ents_r":0.8020833333,
261
- "ents_f":0.8062827225,
262
- "sents_p":0.8324420677,
263
- "sents_r":0.8280141844,
264
- "sents_f":0.8302222222,
265
- "speed":9986.5434782999,
266
  "morph_per_feat":{
267
  "Mood":{
268
- "p":0.9770114943,
269
- "r":0.9723546235,
270
- "f":0.9746774964
271
  },
272
  "Tense":{
273
- "p":0.9704097117,
274
- "r":0.9631024096,
275
- "f":0.9667422525
276
  },
277
  "VerbForm":{
278
- "p":0.9599753695,
279
- "r":0.9541003672,
280
- "f":0.9570288521
281
  },
282
  "Voice":{
283
- "p":0.9753363229,
284
- "r":0.9753363229,
285
- "f":0.9753363229
286
  },
287
  "Definite":{
288
- "p":0.9676258993,
289
- "r":0.9565389174,
290
- "f":0.9620504669
291
  },
292
  "Gender":{
293
- "p":0.9498495486,
294
- "r":0.9441674975,
295
- "f":0.947
296
  },
297
  "Number":{
298
- "p":0.9639283834,
299
- "r":0.9548774126,
300
- "f":0.9593815514
301
  },
302
  "AdpType":{
303
- "p":0.9991071429,
304
- "r":0.9893899204,
305
- "f":0.994224789
306
  },
307
  "PartType":{
308
- "p":0.996763754,
309
  "r":1.0,
310
- "f":0.9983792545
311
  },
312
  "Case":{
313
- "p":0.9757673667,
314
- "r":0.9541864139,
315
- "f":0.96485623
316
  },
317
  "Person":{
318
- "p":0.972027972,
319
- "r":0.9875666075,
320
- "f":0.9797356828
321
  },
322
  "PronType":{
323
- "p":0.9835796388,
324
- "r":0.9851973684,
325
- "f":0.9843878389
326
  },
327
  "NumType":{
328
- "p":0.9727891156,
329
  "r":0.9470198675,
330
- "f":0.9597315436
331
  },
332
  "Degree":{
333
- "p":0.9538274605,
334
- "r":0.9457831325,
335
- "f":0.9497882638
336
  },
337
  "Reflex":{
338
  "p":1.0,
@@ -345,14 +341,14 @@
345
  "f":0.9828571429
346
  },
347
  "Poss":{
348
- "p":0.9887640449,
349
  "r":1.0,
350
- "f":0.9943502825
351
  },
352
  "Foreign":{
353
- "p":1.0,
354
  "r":0.4,
355
- "f":0.5714285714
356
  },
357
  "Abbr":{
358
  "p":0.0,
@@ -365,141 +361,146 @@
365
  "f":1.0
366
  },
367
  "Polite":{
368
- "p":0.6666666667,
369
- "r":0.5,
370
- "f":0.5714285714
371
  }
372
  },
 
 
 
 
 
373
  "dep_las_per_type":{
374
  "advmod":{
375
- "p":0.7148760331,
376
- "r":0.7330508475,
377
- "f":0.7238493724
378
  },
379
  "root":{
380
- "p":0.8327402135,
381
- "r":0.829787234,
382
- "f":0.8312611012
383
  },
384
  "nsubj":{
385
- "p":0.8338590957,
386
- "r":0.8364978903,
387
- "f":0.8351764086
388
  },
389
  "case":{
390
- "p":0.882642998,
391
- "r":0.8843873518,
392
- "f":0.8835143139
393
  },
394
  "obl":{
395
- "p":0.7138314785,
396
- "r":0.6982892691,
397
- "f":0.7059748428
398
  },
399
  "cc":{
400
- "p":0.7636887608,
401
- "r":0.7703488372,
402
- "f":0.7670043415
403
  },
404
  "conj":{
405
- "p":0.6164383562,
406
- "r":0.6,
407
- "f":0.6081081081
408
  },
409
  "obj":{
410
- "p":0.8198874296,
411
  "r":0.8485436893,
412
- "f":0.8339694656
413
  },
414
  "aux":{
415
- "p":0.8918918919,
416
- "r":0.8658892128,
417
- "f":0.8786982249
418
  },
419
  "acl:relcl":{
420
- "p":0.625698324,
421
- "r":0.6054054054,
422
- "f":0.6153846154
423
  },
424
- "obl:loc":{
425
- "p":0.7536231884,
426
- "r":0.7428571429,
427
- "f":0.7482014388
428
  },
429
  "det":{
430
- "p":0.9168053245,
431
- "r":0.9077429984,
432
- "f":0.9122516556
433
  },
434
  "amod":{
435
- "p":0.830449827,
436
- "r":0.819112628,
437
- "f":0.824742268
438
  },
439
  "nmod:poss":{
440
- "p":0.7,
441
- "r":0.6930693069,
442
- "f":0.6965174129
443
  },
444
  "ccomp":{
445
- "p":0.6111111111,
446
- "r":0.7096774194,
447
- "f":0.6567164179
448
  },
449
  "nummod":{
450
- "p":0.8760330579,
451
- "r":0.8833333333,
452
- "f":0.8796680498
453
  },
454
  "flat":{
455
- "p":0.7914110429,
456
- "r":0.8543046358,
457
- "f":0.821656051
458
  },
459
  "compound:prt":{
460
- "p":0.4583333333,
461
- "r":0.2682926829,
462
- "f":0.3384615385
463
  },
464
  "advcl":{
465
- "p":0.6465517241,
466
- "r":0.6465517241,
467
- "f":0.6465517241
468
  },
469
  "mark":{
470
- "p":0.8802521008,
471
- "r":0.8603696099,
472
- "f":0.8701973001
473
  },
474
  "cop":{
475
- "p":0.8087431694,
476
- "r":0.8457142857,
477
- "f":0.8268156425
478
  },
479
  "dep":{
480
- "p":0.1573033708,
481
  "r":0.2641509434,
482
- "f":0.1971830986
483
  },
484
  "nmod":{
485
- "p":0.6455445545,
486
- "r":0.63671875,
487
- "f":0.6411012783
488
  },
489
  "iobj":{
490
- "p":0.7222222222,
491
  "r":0.5909090909,
492
- "f":0.65
493
  },
494
  "xcomp":{
495
- "p":0.6216216216,
496
- "r":0.3898305085,
497
- "f":0.4791666667
498
  },
499
  "list":{
500
- "p":0.375,
501
- "r":0.3333333333,
502
- "f":0.3529411765
503
  },
504
  "vocative":{
505
  "p":0.0,
@@ -507,57 +508,68 @@
507
  "f":0.0
508
  },
509
  "fixed":{
510
- "p":0.8974358974,
511
- "r":0.8333333333,
512
- "f":0.8641975309
513
  },
514
  "expl":{
515
- "p":0.8181818182,
516
- "r":0.7941176471,
517
- "f":0.8059701493
518
  },
519
  "appos":{
520
- "p":0.5151515152,
521
  "r":0.5151515152,
522
- "f":0.5151515152
523
  },
524
  "obl:tmod":{
525
- "p":0.5,
526
- "r":0.3888888889,
527
- "f":0.4375
528
  },
529
  "discourse":{
530
  "p":0.0,
531
  "r":0.0,
532
  "f":0.0
 
 
 
 
 
533
  }
534
  },
 
 
 
 
 
535
  "ents_per_type":{
536
  "PER":{
537
- "p":0.8867924528,
538
  "r":0.8493975904,
539
- "f":0.8676923077
540
  },
541
  "ORG":{
542
- "p":0.7325581395,
543
- "r":0.7,
544
- "f":0.7159090909
545
  },
546
  "MISC":{
547
- "p":0.7155172414,
548
- "r":0.7345132743,
549
- "f":0.7248908297
550
  },
551
  "LOC":{
552
- "p":0.8596491228,
553
- "r":0.8828828829,
554
- "f":0.8711111111
555
  }
556
- }
 
557
  },
558
  "sources":[
559
  {
560
- "name":"UD Danish DDT v2.5",
561
  "url":"https://github.com/UniversalDependencies/UD_Danish-DDT",
562
  "license":"CC BY-SA 4.0",
563
  "author":"Johannsen, Anders; Mart\u00ednez Alonso, H\u00e9ctor; Plank, Barbara"
 
1
  {
2
  "lang":"da",
3
  "name":"core_news_md",
4
+ "version":"3.2.0",
5
  "description":"Danish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 4.0",
10
+ "spacy_version":">=3.2.0,<3.3.0",
11
+ "spacy_git_version":"bb26550e2",
12
  "vectors":{
13
  "width":300,
14
  "vectors":20000,
 
183
  "acl:relcl",
184
  "advcl",
185
  "advmod",
186
+ "advmod:lmod",
187
  "amod",
188
  "appos",
189
  "aux",
 
207
  "nummod",
208
  "obj",
209
  "obl",
210
+ "obl:lmod",
211
  "obl:tmod",
212
  "punct",
213
  "xcomp"
 
251
  ],
252
  "performance":{
253
  "token_acc":0.9994672349,
254
+ "token_p":0.9977732598,
255
+ "token_r":0.9974835463,
256
+ "token_f":0.997628382,
257
+ "pos_acc":0.962905569,
258
+ "morph_acc":0.9487651332,
259
+ "morph_micro_p":0.9664501066,
260
+ "morph_micro_r":0.9611327041,
261
+ "morph_micro_f":0.9637840711,
 
 
 
 
 
262
  "morph_per_feat":{
263
  "Mood":{
264
+ "p":0.9789674952,
265
+ "r":0.9761677788,
266
+ "f":0.9775656325
267
  },
268
  "Tense":{
269
+ "p":0.9698340875,
270
+ "r":0.968373494,
271
+ "f":0.9691032404
272
  },
273
  "VerbForm":{
274
+ "p":0.9631449631,
275
+ "r":0.9596083231,
276
+ "f":0.9613733906
277
  },
278
  "Voice":{
279
+ "p":0.9782934132,
280
+ "r":0.9768310912,
281
+ "f":0.9775617053
282
  },
283
  "Definite":{
284
+ "p":0.9633027523,
285
+ "r":0.9541683129,
286
+ "f":0.9587137753
287
  },
288
  "Gender":{
289
+ "p":0.9459098497,
290
+ "r":0.9415088069,
291
+ "f":0.9437041972
292
  },
293
  "Number":{
294
+ "p":0.9618621778,
295
+ "r":0.9538341158,
296
+ "f":0.9578313253
297
  },
298
  "AdpType":{
299
+ "p":0.9973333333,
300
+ "r":0.9920424403,
301
+ "f":0.9946808511
302
  },
303
  "PartType":{
304
+ "p":1.0,
305
  "r":1.0,
306
+ "f":1.0
307
  },
308
  "Case":{
309
+ "p":0.9696485623,
310
+ "r":0.9589257504,
311
+ "f":0.9642573471
312
  },
313
  "Person":{
314
+ "p":0.9704347826,
315
+ "r":0.9911190053,
316
+ "f":0.9806678383
317
  },
318
  "PronType":{
319
+ "p":0.9811165846,
320
+ "r":0.9827302632,
321
+ "f":0.9819227609
322
  },
323
  "NumType":{
324
+ "p":0.9794520548,
325
  "r":0.9470198675,
326
+ "f":0.962962963
327
  },
328
  "Degree":{
329
+ "p":0.9520295203,
330
+ "r":0.9325301205,
331
+ "f":0.942178941
332
  },
333
  "Reflex":{
334
  "p":1.0,
 
341
  "f":0.9828571429
342
  },
343
  "Poss":{
344
+ "p":0.9777777778,
345
  "r":1.0,
346
+ "f":0.9887640449
347
  },
348
  "Foreign":{
349
+ "p":0.6666666667,
350
  "r":0.4,
351
+ "f":0.5
352
  },
353
  "Abbr":{
354
  "p":0.0,
 
361
  "f":1.0
362
  },
363
  "Polite":{
364
+ "p":1.0,
365
+ "r":0.75,
366
+ "f":0.8571428571
367
  }
368
  },
369
+ "sents_p":0.908438061,
370
+ "sents_r":0.8971631206,
371
+ "sents_f":0.902765388,
372
+ "dep_uas":0.8220632614,
373
+ "dep_las":0.7813355686,
374
  "dep_las_per_type":{
375
  "advmod":{
376
+ "p":0.7073509015,
377
+ "r":0.7203389831,
378
+ "f":0.7137858642
379
  },
380
  "root":{
381
+ "p":0.8411552347,
382
+ "r":0.8262411348,
383
+ "f":0.8336314848
384
  },
385
  "nsubj":{
386
+ "p":0.8428417653,
387
+ "r":0.8259493671,
388
+ "f":0.8343100693
389
  },
390
  "case":{
391
+ "p":0.881372549,
392
+ "r":0.8865877712,
393
+ "f":0.883972468
394
  },
395
  "obl":{
396
+ "p":0.7053291536,
397
+ "r":0.698757764,
398
+ "f":0.7020280811
399
  },
400
  "cc":{
401
+ "p":0.7851002865,
402
+ "r":0.7965116279,
403
+ "f":0.7907647908
404
  },
405
  "conj":{
406
+ "p":0.6491712707,
407
+ "r":0.6266666667,
408
+ "f":0.6377204885
409
  },
410
  "obj":{
411
+ "p":0.8033088235,
412
  "r":0.8485436893,
413
+ "f":0.8253068933
414
  },
415
  "aux":{
416
+ "p":0.875739645,
417
+ "r":0.8629737609,
418
+ "f":0.8693098385
419
  },
420
  "acl:relcl":{
421
+ "p":0.5879120879,
422
+ "r":0.5783783784,
423
+ "f":0.583106267
424
  },
425
+ "advmod:lmod":{
426
+ "p":0.7586206897,
427
+ "r":0.6567164179,
428
+ "f":0.704
429
  },
430
  "det":{
431
+ "p":0.9194078947,
432
+ "r":0.92092257,
433
+ "f":0.9201646091
434
  },
435
  "amod":{
436
+ "p":0.8193979933,
437
+ "r":0.8361774744,
438
+ "f":0.8277027027
439
  },
440
  "nmod:poss":{
441
+ "p":0.7340425532,
442
+ "r":0.6831683168,
443
+ "f":0.7076923077
444
  },
445
  "ccomp":{
446
+ "p":0.5652173913,
447
+ "r":0.6290322581,
448
+ "f":0.5954198473
449
  },
450
  "nummod":{
451
+ "p":0.8306451613,
452
+ "r":0.8583333333,
453
+ "f":0.8442622951
454
  },
455
  "flat":{
456
+ "p":0.7636363636,
457
+ "r":0.8344370861,
458
+ "f":0.7974683544
459
  },
460
  "compound:prt":{
461
+ "p":0.5,
462
+ "r":0.3170731707,
463
+ "f":0.3880597015
464
  },
465
  "advcl":{
466
+ "p":0.6339285714,
467
+ "r":0.6120689655,
468
+ "f":0.6228070175
469
  },
470
  "mark":{
471
+ "p":0.8905579399,
472
+ "r":0.8521560575,
473
+ "f":0.870933893
474
  },
475
  "cop":{
476
+ "p":0.7837837838,
477
+ "r":0.8285714286,
478
+ "f":0.8055555556
479
  },
480
  "dep":{
481
+ "p":0.1555555556,
482
  "r":0.2641509434,
483
+ "f":0.1958041958
484
  },
485
  "nmod":{
486
+ "p":0.6352941176,
487
+ "r":0.6328125,
488
+ "f":0.6340508806
489
  },
490
  "iobj":{
491
+ "p":0.8125,
492
  "r":0.5909090909,
493
+ "f":0.6842105263
494
  },
495
  "xcomp":{
496
+ "p":0.5675675676,
497
+ "r":0.3559322034,
498
+ "f":0.4375
499
  },
500
  "list":{
501
+ "p":0.4,
502
+ "r":0.2222222222,
503
+ "f":0.2857142857
504
  },
505
  "vocative":{
506
  "p":0.0,
 
508
  "f":0.0
509
  },
510
  "fixed":{
511
+ "p":0.8918918919,
512
+ "r":0.8048780488,
513
+ "f":0.8461538462
514
  },
515
  "expl":{
516
+ "p":0.8484848485,
517
+ "r":0.8235294118,
518
+ "f":0.8358208955
519
  },
520
  "appos":{
521
+ "p":0.4146341463,
522
  "r":0.5151515152,
523
+ "f":0.4594594595
524
  },
525
  "obl:tmod":{
526
+ "p":0.6,
527
+ "r":0.3333333333,
528
+ "f":0.4285714286
529
  },
530
  "discourse":{
531
  "p":0.0,
532
  "r":0.0,
533
  "f":0.0
534
+ },
535
+ "obl:lmod":{
536
+ "p":0.0,
537
+ "r":0.0,
538
+ "f":0.0
539
  }
540
  },
541
+ "tag_acc":0.962905569,
542
+ "lemma_acc":0.8491041162,
543
+ "ents_p":0.8075313808,
544
+ "ents_r":0.8041666667,
545
+ "ents_f":0.8058455115,
546
  "ents_per_type":{
547
  "PER":{
548
+ "p":0.9215686275,
549
  "r":0.8493975904,
550
+ "f":0.8840125392
551
  },
552
  "ORG":{
553
+ "p":0.7032967033,
554
+ "r":0.7111111111,
555
+ "f":0.7071823204
556
  },
557
  "MISC":{
558
+ "p":0.7179487179,
559
+ "r":0.7433628319,
560
+ "f":0.7304347826
561
  },
562
  "LOC":{
563
+ "p":0.8290598291,
564
+ "r":0.8738738739,
565
+ "f":0.850877193
566
  }
567
+ },
568
+ "speed":10055.5372609128
569
  },
570
  "sources":[
571
  {
572
+ "name":"UD Danish DDT v2.8",
573
  "url":"https://github.com/UniversalDependencies/UD_Danish-DDT",
574
  "license":"CC BY-SA 4.0",
575
  "author":"Johannsen, Anders; Mart\u00ednez Alonso, H\u00e9ctor; Plank, Barbara"
morphologizer/cfg CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "labels_morph":{
3
  "AdpType=Prep|POS=ADP":"AdpType=Prep",
4
  "Definite=Ind|Gender=Com|Number=Sing|POS=NOUN":"Definite=Ind|Gender=Com|Number=Sing",
@@ -316,5 +317,6 @@
316
  "Number[psor]=Plur|POS=PRON|Person=3|Poss=Yes|PronType=Prs":95,
317
  "POS=DET|PronType=Dem":90,
318
  "Definite=Def|Number=Plur|POS=NOUN":92
319
- }
 
320
  }
 
1
  {
2
+ "extend":false,
3
  "labels_morph":{
4
  "AdpType=Prep|POS=ADP":"AdpType=Prep",
5
  "Definite=Ind|Gender=Com|Number=Sing|POS=NOUN":"Definite=Ind|Gender=Com|Number=Sing",
 
317
  "Number[psor]=Plur|POS=PRON|Person=3|Poss=Yes|PronType=Prs":95,
318
  "POS=DET|PronType=Dem":90,
319
  "Definite=Def|Number=Plur|POS=NOUN":92
320
+ },
321
+ "overwrite":true
322
  }
morphologizer/model CHANGED
Binary files a/morphologizer/model and b/morphologizer/model differ
 
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
parser/model CHANGED
Binary files a/parser/model and b/parser/model differ
 
parser/moves CHANGED
@@ -1 +1 @@
1
- ��moves�2{"0":{"":41514},"1":{"":34295},"2":{"case":7489,"nsubj":6009,"det":4334,"amod":3968,"advmod":3657,"mark":3529,"aux":2432,"cc":2261,"punct":2182,"cop":1329,"obl":894,"nummod":799,"nmod:poss":651,"nmod":460,"expl":291,"ccomp":202,"obj":195,"xcomp":122,"case||nmod":73,"obl:tmod":53,"dep":49,"acl:relcl":43},"3":{"punct":8601,"obl":3949,"obj":3758,"nmod":3565,"conj":2745,"advmod":2095,"flat":1295,"nsubj":1172,"acl:relcl":1131,"advcl":808,"amod":628,"obl:loc":467,"fixed":390,"dep":322,"xcomp":272,"appos":268,"compound:prt":261,"ccomp":252,"acl:relcl||nsubj":237,"case":202,"nummod":167,"list":161,"nmod:poss":156,"punct||conj":151,"mark":137,"cc":135,"iobj":107,"expl":77,"cop":69,"nmod||case":60,"aux":48,"obl:tmod":45,"cc||case":43,"advcl||advmod":43,"cc||conj":40,"case||obl":38,"punct||case":33},"4":{"ROOT":4367}}�cfg��neg_key�
 
1
+ ��moves�D{"0":{"":41514},"1":{"":34295},"2":{"case":7489,"nsubj":6009,"det":4334,"amod":3968,"advmod":3657,"mark":3529,"aux":2432,"cc":2261,"punct":2182,"cop":1329,"obl":894,"nummod":799,"nmod:poss":651,"nmod":460,"expl":291,"ccomp":202,"obj":195,"xcomp":122,"case||nmod":73,"obl:tmod":53,"dep":49,"acl:relcl":43},"3":{"punct":8601,"obl":3949,"obj":3758,"nmod":3565,"conj":2745,"advmod":2095,"flat":1295,"nsubj":1172,"acl:relcl":1131,"advcl":808,"amod":628,"advmod:lmod":423,"fixed":390,"dep":322,"xcomp":272,"appos":268,"compound:prt":261,"ccomp":252,"acl:relcl||nsubj":237,"case":202,"nummod":167,"list":161,"nmod:poss":156,"punct||conj":151,"mark":137,"cc":135,"iobj":107,"expl":77,"cop":69,"nmod||case":60,"aux":48,"obl:tmod":45,"obl:lmod":44,"cc||case":43,"advcl||advmod":43,"cc||conj":40,"case||obl":38,"punct||case":33},"4":{"ROOT":4367}}�cfg��neg_key�
senter/cfg CHANGED
@@ -1,3 +1,3 @@
1
  {
2
-
3
  }
 
1
  {
2
+ "overwrite":false
3
  }
senter/model CHANGED
Binary files a/senter/model and b/senter/model differ
 
tok2vec/model CHANGED
Binary files a/tok2vec/model and b/tok2vec/model differ
 
tokenizer CHANGED
The diff for this file is too large to render. See raw diff
 
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efaceca843effeab261a6ce55a6d3a99ae99949b7e28bdf0541fdc4c2d3e2c5a
3
- size 8625506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42fe610567bec6fa69da580a4753d083f1f4429efd32b3c1fa638b6a07a6757e
3
+ size 10070327
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }