Adriane Boyd commited on
Commit
88f310c
1 Parent(s): ad0f59f

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,48 +14,48 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.7714191745
18
  - name: NER Recall
19
  type: recall
20
- value: 0.6628046627
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.712999202
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
- value: 0.7335983702
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
- value: 0.8608501296
38
  - task:
39
  name: LEMMA
40
  type: token-classification
41
  metrics:
42
  - name: Lemma Accuracy
43
  type: accuracy
44
- value: 0.8317806919
45
  - task:
46
  name: UNLABELED_DEPENDENCIES
47
  type: token-classification
48
  metrics:
49
  - name: Unlabeled Attachment Score (UAS)
50
  type: f_score
51
- value: 0.7375785393
52
  - task:
53
  name: LABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Labeled Attachment Score (LAS)
57
  type: f_score
58
- value: 0.6555857582
59
  - task:
60
  name: SENTS
61
  type: token-classification
@@ -71,8 +71,8 @@ Korean pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, p
71
  | Feature | Description |
72
  | --- | --- |
73
  | **Name** | `ko_core_news_sm` |
74
- | **Version** | `3.3.0` |
75
- | **spaCy** | `>=3.3.0.dev0,<3.4.0` |
76
  | **Default Pipeline** | `tok2vec`, `tagger`, `morphologizer`, `parser`, `lemmatizer`, `attribute_ruler`, `ner` |
77
  | **Components** | `tok2vec`, `tagger`, `morphologizer`, `parser`, `lemmatizer`, `senter`, `attribute_ruler`, `ner` |
78
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
@@ -84,12 +84,12 @@ Korean pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, p
84
 
85
  <details>
86
 
87
- <summary>View label scheme (2026 labels for 4 components)</summary>
88
 
89
  | Component | Labels |
90
  | --- | --- |
91
- | **`tagger`** | `ecs`, `etm`, `f`, `f+f+jcj`, `f+f+jcs`, `f+f+jct`, `f+f+jxt`, `f+jca`, `f+jca+jp+ecc`, `f+jca+jp+ep+ef`, `f+jca+jxc`, `f+jca+jxc+jcm`, `f+jca+jxt`, `f+jcj`, `f+jcm`, `f+jco`, `f+jcs`, `f+jct`, `f+jct+jcm`, `f+jp+ef`, `f+jp+ep+ef`, `f+jp+etm`, `f+jxc`, `f+jxt`, `f+ncn`, `f+ncn+jcm`, `f+ncn+jcs`, `f+ncn+jp+ecc`, `f+ncn+jxt`, `f+ncpa+jcm`, `f+npp+jcs`, `f+nq`, `f+xsn`, `f+xsn+jco`, `f+xsn+jxt`, `ii`, `jca`, `jca+jcm`, `jca+jxc`, `jca+jxt`, `jcc`, `jcj`, `jcm`, `jco`, `jcr`, `jcr+jxc`, `jcs`, `jct`, `jct+jcm`, `jct+jxt`, `jp+ecc`, `jp+ecs`, `jp+ef`, `jp+ef+jcr`, `jp+ef+jcr+jxc`, `jp+ep+ecs`, `jp+ep+ef`, `jp+ep+etm`, `jp+ep+etn`, `jp+etm`, `jp+etn`, `jp+etn+jco`, `jp+etn+jxc`, `jxc`, `jxc+jca`, `jxc+jco`, `jxc+jcs`, `jxt`, `mad`, `mad+jxc`, `mad+jxt`, `mag`, `mag+jca`, `mag+jcm`, `mag+jcs`, `mag+jp+ef+jcr`, `mag+jxc`, `mag+jxc+jxc`, `mag+jxt`, `mag+xsn`, `maj`, `maj+jxc`, `maj+jxt`, `mma`, `mmd`, `nbn`, `nbn+jca`, `nbn+jca+jcj`, `nbn+jca+jcm`, `nbn+jca+jp+ef`, `nbn+jca+jxc`, `nbn+jca+jxt`, `nbn+jcc`, `nbn+jcj`, `nbn+jcm`, `nbn+jco`, `nbn+jcr`, `nbn+jcs`, `nbn+jct`, `nbn+jct+jcm`, `nbn+jct+jxt`, `nbn+jp+ecc`, `nbn+jp+ecs`, `nbn+jp+ecs+jca`, `nbn+jp+ecs+jcm`, `nbn+jp+ecs+jco`, `nbn+jp+ecs+jxc`, `nbn+jp+ecs+jxt`, `nbn+jp+ecx`, `nbn+jp+ef`, `nbn+jp+ef+jca`, `nbn+jp+ef+jco`, `nbn+jp+ef+jcr`, `nbn+jp+ef+jcr+jxc`, `nbn+jp+ef+jcr+jxt`, `nbn+jp+ef+jcs`, `nbn+jp+ef+jxc`, `nbn+jp+ef+jxc+jco`, `nbn+jp+ef+jxf`, `nbn+jp+ef+jxt`, `nbn+jp+ep+ecc`, `nbn+jp+ep+ecs`, `nbn+jp+ep+ecs+jxc`, `nbn+jp+ep+ef`, `nbn+jp+ep+ef+jcr`, `nbn+jp+ep+etm`, `nbn+jp+ep+etn`, `nbn+jp+ep+etn+jco`, `nbn+jp+ep+etn+jcs`, `nbn+jp+etm`, `nbn+jp+etn`, `nbn+jp+etn+jca`, `nbn+jp+etn+jca+jxt`, `nbn+jp+etn+jco`, `nbn+jp+etn+jcs`, `nbn+jp+etn+jxc`, `nbn+jp+etn+jxt`, `nbn+jxc`, `nbn+jxc+jca`, `nbn+jxc+jca+jxc`, `nbn+jxc+jca+jxt`, `nbn+jxc+jcc`, `nbn+jxc+jcm`, `nbn+jxc+jco`, `nbn+jxc+jcs`, `nbn+jxc+jp+ef`, `nbn+jxc+jxc`, `nbn+jxc+jxt`, `nbn+jxt`, `nbn+nbn`, `nbn+nbn+jp+ef`, `nbn+xsm+ecs`, `nbn+xsm+ef`, `nbn+xsm+ep+ef`, `nbn+xsm+ep+ef+jcr`, `nbn+xsm+etm`, `nbn+xsn`, `nbn+xsn+jca`, `nbn+xsn+jca+jp+ef+jcr`, `nbn+xsn+jca+jxc`, `nbn+xsn+jca+jxt`, `nbn+xsn+jcm`, `nbn+xsn+jco`, `nbn+xsn+jcs`, `nbn+xsn+jct`, `nbn+xsn+jp+ecc`, `nbn+xsn+jp+ecs`, `nbn+xsn+jp+ef`, `nbn+xsn+jp+ef+jcr`, `nbn+xsn+jp+ep+ef`, `nbn+xsn+jxc`, `nbn+xsn+jxt`, `nbn+xsv+etm`, `nbu`, `nbu+jca`, `nbu+jca+jxc`, `nbu+jca+jxt`, `nbu+jcc`, `nbu+jcc+jxc`, `nbu+jcj`, `nbu+jcm`, `nbu+jco`, `nbu+jcs`, `nbu+jct`, `nbu+jct+jxc`, `nbu+jp+ecc`, `nbu+jp+ecs`, `nbu+jp+ef`, `nbu+jp+ef+jcr`, `nbu+jp+ef+jxc`, `nbu+jp+ep+ecc`, `nbu+jp+ep+ecs`, `nbu+jp+ep+ef`, `nbu+jp+ep+ef+jcr`, `nbu+jp+ep+etm`, `nbu+jp+ep+etn+jco`, `nbu+jp+etm`, `nbu+jxc`, `nbu+jxc+jca`, `nbu+jxc+jcs`, `nbu+jxc+jp+ef`, `nbu+jxc+jp+ep+ef`, `nbu+jxc+jxt`, `nbu+jxt`, `nbu+ncn`, `nbu+ncn+jca`, `nbu+ncn+jcm`, `nbu+xsn`, `nbu+xsn+jca`, `nbu+xsn+jca+jxc`, `nbu+xsn+jca+jxt`, `nbu+xsn+jcm`, `nbu+xsn+jco`, `nbu+xsn+jcs`, `nbu+xsn+jp+ecs`, `nbu+xsn+jp+ep+ef`, `nbu+xsn+jxc`, `nbu+xsn+jxc+jxt`, `nbu+xsn+jxt`, `nbu+xsv+ecc`, `nbu+xsv+etm`, `ncn`, `ncn+f+ncpa+jco`, `ncn+jca`, `ncn+jca+jca`, `ncn+jca+jcc`, `ncn+jca+jcj`, `ncn+jca+jcm`, `ncn+jca+jcs`, `ncn+jca+jct`, `ncn+jca+jp+ecc`, `ncn+jca+jp+ecs`, `ncn+jca+jp+ef`, `ncn+jca+jp+ep+ef`, `ncn+jca+jp+etm`, `ncn+jca+jp+etn+jxt`, `ncn+jca+jxc`, `ncn+jca+jxc+jcc`, `ncn+jca+jxc+jcm`, `ncn+jca+jxc+jxc`, `ncn+jca+jxc+jxt`, `ncn+jca+jxt`, `ncn+jcc`, `ncn+jcc+jxc`, `ncn+jcj`, `ncn+jcj+jxt`, `ncn+jcm`, `ncn+jco`, `ncn+jcr`, `ncn+jcr+jxc`, `ncn+jcs`, `ncn+jcs+jxt`, `ncn+jct`, `ncn+jct+jcm`, `ncn+jct+jxc`, `ncn+jct+jxt`, `ncn+jcv`, `ncn+jp+ecc`, `ncn+jp+ecc+jct`, `ncn+jp+ecc+jxc`, `ncn+jp+ecs`, `ncn+jp+ecs+jcm`, `ncn+jp+ecs+jco`, `ncn+jp+ecs+jxc`, `ncn+jp+ecs+jxt`, `ncn+jp+ecx`, `ncn+jp+ef`, `ncn+jp+ef+jca`, `ncn+jp+ef+jcm`, `ncn+jp+ef+jco`, `ncn+jp+ef+jcr`, `ncn+jp+ef+jcr+jxc`, `ncn+jp+ef+jcr+jxt`, `ncn+jp+ef+jp+etm`, `ncn+jp+ef+jxc`, `ncn+jp+ef+jxf`, `ncn+jp+ef+jxt`, `ncn+jp+ep+ecc`, `ncn+jp+ep+ecs`, `ncn+jp+ep+ecs+jxc`, `ncn+jp+ep+ecx`, `ncn+jp+ep+ef`, `ncn+jp+ep+ef+jcr`, `ncn+jp+ep+ef+jcr+jxc`, `ncn+jp+ep+ef+jxc`, `ncn+jp+ep+ef+jxf`, `ncn+jp+ep+ef+jxt`, `ncn+jp+ep+ep+etm`, `ncn+jp+ep+etm`, `ncn+jp+ep+etn`, `ncn+jp+ep+etn+jca`, `ncn+jp+ep+etn+jca+jxc`, `ncn+jp+ep+etn+jco`, `ncn+jp+ep+etn+jcs`, `ncn+jp+ep+etn+jxt`, `ncn+jp+etm`, `ncn+jp+etn`, `ncn+jp+etn+jca`, `ncn+jp+etn+jca+jxc`, `ncn+jp+etn+jca+jxt`, `ncn+jp+etn+jco`, `ncn+jp+etn+jcs`, `ncn+jp+etn+jct`, `ncn+jp+etn+jxc`, `ncn+jp+etn+jxt`, `ncn+jxc`, `ncn+jxc+jca`, `ncn+jxc+jca+jxc`, `ncn+jxc+jca+jxt`, `ncn+jxc+jcc`, `ncn+jxc+jcm`, `ncn+jxc+jco`, `ncn+jxc+jcs`, `ncn+jxc+jct+jxt`, `ncn+jxc+jp+ef`, `ncn+jxc+jp+ef+jcr`, `ncn+jxc+jp+ep+ecs`, `ncn+jxc+jp+ep+ef`, `ncn+jxc+jp+etm`, `ncn+jxc+jxc`, `ncn+jxc+jxt`, `ncn+jxt`, `ncn+jxt+jcm`, `ncn+jxt+jxc`, `ncn+nbn`, `ncn+nbn+jca`, `ncn+nbn+jcm`, `ncn+nbn+jcs`, `ncn+nbn+jp+ecc`, `ncn+nbn+jp+ep+ef`, `ncn+nbn+jxc`, `ncn+nbn+jxt`, `ncn+nbu`, `ncn+nbu+jca`, `ncn+nbu+jcm`, `ncn+nbu+jco`, `ncn+nbu+jp+ef`, `ncn+nbu+jxc`, `ncn+nbu+ncn`, `ncn+ncn`, `ncn+ncn+jca`, `ncn+ncn+jca+jcc`, `ncn+ncn+jca+jcm`, `ncn+ncn+jca+jxc`, `ncn+ncn+jca+jxc+jcm`, `ncn+ncn+jca+jxc+jxc`, `ncn+ncn+jca+jxt`, `ncn+ncn+jcc`, `ncn+ncn+jcj`, `ncn+ncn+jcm`, `ncn+ncn+jco`, `ncn+ncn+jcr`, `ncn+ncn+jcs`, `ncn+ncn+jct`, `ncn+ncn+jct+jcm`, `ncn+ncn+jct+jxc`, `ncn+ncn+jct+jxt`, `ncn+ncn+jp+ecc`, `ncn+ncn+jp+ecs`, `ncn+ncn+jp+ef`, `ncn+ncn+jp+ef+jcm`, `ncn+ncn+jp+ef+jcr`, `ncn+ncn+jp+ef+jcs`, `ncn+ncn+jp+ep+ecc`, `ncn+ncn+jp+ep+ecs`, `ncn+ncn+jp+ep+ef`, `ncn+ncn+jp+ep+ef+jcr`, `ncn+ncn+jp+ep+ep+etm`, `ncn+ncn+jp+ep+etm`, `ncn+ncn+jp+ep+etn`, `ncn+ncn+jp+etm`, `ncn+ncn+jp+etn`, `ncn+ncn+jp+etn+jca`, `ncn+ncn+jp+etn+jco`, `ncn+ncn+jp+etn+jxc`, `ncn+ncn+jxc`, `ncn+ncn+jxc+jca`, `ncn+ncn+jxc+jcc`, `ncn+ncn+jxc+jcm`, `ncn+ncn+jxc+jco`, `ncn+ncn+jxc+jcs`, `ncn+ncn+jxc+jxc`, `ncn+ncn+jxt`, `ncn+ncn+nbn`, `ncn+ncn+ncn`, `ncn+ncn+ncn+jca`, `ncn+ncn+ncn+jca+jcm`, `ncn+ncn+ncn+jca+jxt`, `ncn+ncn+ncn+jcj`, `ncn+ncn+ncn+jcm`, `ncn+ncn+ncn+jco`, `ncn+ncn+ncn+jcs`, `ncn+ncn+ncn+jct+jxt`, `ncn+ncn+ncn+jp+etn+jxc`, `ncn+ncn+ncn+jxt`, `ncn+ncn+ncn+ncn+jca`, `ncn+ncn+ncn+ncn+jca+jxt`, `ncn+ncn+ncn+ncn+jco`, `ncn+ncn+ncn+xsn+jp+etm`, `ncn+ncn+ncpa`, `ncn+ncn+ncpa+jca`, `ncn+ncn+ncpa+jcm`, `ncn+ncn+ncpa+jco`, `ncn+ncn+ncpa+jcs`, `ncn+ncn+ncpa+jxc`, `ncn+ncn+ncpa+jxt`, `ncn+ncn+ncpa+ncn`, `ncn+ncn+ncpa+ncn+jca`, `ncn+ncn+ncpa+ncn+jcj`, `ncn+ncn+ncpa+ncn+jcm`, `ncn+ncn+ncpa+ncn+jxt`, `ncn+ncn+xsn`, `ncn+ncn+xsn+jca`, `ncn+ncn+xsn+jca+jxt`, `ncn+ncn+xsn+jcj`, `ncn+ncn+xsn+jcm`, `ncn+ncn+xsn+jco`, `ncn+ncn+xsn+jcs`, `ncn+ncn+xsn+jct`, `ncn+ncn+xsn+jp+ecs`, `ncn+ncn+xsn+jp+ep+ef`, `ncn+ncn+xsn+jp+etm`, `ncn+ncn+xsn+jxc`, `ncn+ncn+xsn+jxc+jcs`, `ncn+ncn+xsn+jxt`, `ncn+ncn+xsv+ecc`, `ncn+ncn+xsv+etm`, `ncn+ncpa`, `ncn+ncpa+jca`, `ncn+ncpa+jca+jcm`, `ncn+ncpa+jca+jxc`, `ncn+ncpa+jca+jxt`, `ncn+ncpa+jcc`, `ncn+ncpa+jcj`, `ncn+ncpa+jcm`, `ncn+ncpa+jco`, `ncn+ncpa+jcr`, `ncn+ncpa+jcs`, `ncn+ncpa+jct`, `ncn+ncpa+jct+jcm`, `ncn+ncpa+jct+jxt`, `ncn+ncpa+jp+ecc`, `ncn+ncpa+jp+ecc+jxc`, `ncn+ncpa+jp+ecs`, `ncn+ncpa+jp+ecs+jxc`, `ncn+ncpa+jp+ef`, `ncn+ncpa+jp+ef+jcr`, `ncn+ncpa+jp+ef+jcr+jxc`, `ncn+ncpa+jp+ep+ef`, `ncn+ncpa+jp+ep+etm`, `ncn+ncpa+jp+ep+etn`, `ncn+ncpa+jp+etm`, `ncn+ncpa+jxc`, `ncn+ncpa+jxc+jca+jxc`, `ncn+ncpa+jxc+jco`, `ncn+ncpa+jxc+jcs`, `ncn+ncpa+jxt`, `ncn+ncpa+nbn+jcs`, `ncn+ncpa+ncn`, `ncn+ncpa+ncn+jca`, `ncn+ncpa+ncn+jca+jcm`, `ncn+ncpa+ncn+jca+jxc`, `ncn+ncpa+ncn+jca+jxt`, `ncn+ncpa+ncn+jcj`, `ncn+ncpa+ncn+jcm`, `ncn+ncpa+ncn+jco`, `ncn+ncpa+ncn+jcs`, `ncn+ncpa+ncn+jct`, `ncn+ncpa+ncn+jct+jcm`, `ncn+ncpa+ncn+jp+ef+jcr`, `ncn+ncpa+ncn+jp+ep+etm`, `ncn+ncpa+ncn+jxc`, `ncn+ncpa+ncn+jxt`, `ncn+ncpa+ncn+xsn+jcm`, `ncn+ncpa+ncn+xsn+jxt`, `ncn+ncpa+ncpa`, `ncn+ncpa+ncpa+jca`, `ncn+ncpa+ncpa+jcj`, `ncn+ncpa+ncpa+jcm`, `ncn+ncpa+ncpa+jco`, `ncn+ncpa+ncpa+jcs`, `ncn+ncpa+ncpa+jp+ep+ef`, `ncn+ncpa+ncpa+jxt`, `ncn+ncpa+ncpa+ncn`, `ncn+ncpa+xsn`, `ncn+ncpa+xsn+jcm`, `ncn+ncpa+xsn+jco`, `ncn+ncpa+xsn+jcs`, `ncn+ncpa+xsn+jp+ecc`, `ncn+ncpa+xsn+jp+etm`, `ncn+ncpa+xsn+jxt`, `ncn+ncpa+xsv+ecc`, `ncn+ncpa+xsv+ecs`, `ncn+ncpa+xsv+ecx`, `ncn+ncpa+xsv+ecx+px+etm`, `ncn+ncpa+xsv+ef`, `ncn+ncpa+xsv+ef+jcm`, `ncn+ncpa+xsv+ef+jcr`, `ncn+ncpa+xsv+etm`, `ncn+ncpa+xsv+etn`, _(truncated: full list in pipeline meta)_ |
92
- | **`morphologizer`** | `POS=CCONJ`, `POS=ADV`, `POS=SCONJ`, `POS=DET`, `POS=NOUN`, `POS=VERB`, `POS=ADJ`, `POS=PUNCT`, `POS=AUX`, `POS=PRON`, `POS=PROPN`, `POS=NUM`, `POS=INTJ`, `POS=PART`, `POS=X`, `POS=ADP`, `POS=SYM` |
93
  | **`parser`** | `ROOT`, `acl`, `advcl`, `advmod`, `amod`, `appos`, `aux`, `case`, `cc`, `ccomp`, `compound`, `conj`, `cop`, `csubj`, `dep`, `det`, `dislocated`, `fixed`, `flat`, `iobj`, `mark`, `nmod`, `nsubj`, `nummod`, `obj`, `obl`, `punct`, `xcomp` |
94
  | **`ner`** | `DT`, `LC`, `OG`, `PS`, `QT`, `TI` |
95
 
@@ -103,14 +103,14 @@ Korean pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, p
103
  | `TOKEN_P` | 100.00 |
104
  | `TOKEN_R` | 100.00 |
105
  | `TOKEN_F` | 100.00 |
106
- | `TAG_ACC` | 73.36 |
107
- | `POS_ACC` | 86.09 |
108
  | `SENTS_P` | 100.00 |
109
  | `SENTS_R` | 100.00 |
110
  | `SENTS_F` | 100.00 |
111
- | `DEP_UAS` | 73.76 |
112
- | `DEP_LAS` | 65.56 |
113
- | `LEMMA_ACC` | 83.18 |
114
- | `ENTS_P` | 77.14 |
115
- | `ENTS_R` | 66.28 |
116
- | `ENTS_F` | 71.30 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.7701563395
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.6647121159
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.7135598362
24
  - task:
25
  name: TAG
26
  type: token-classification
27
  metrics:
28
  - name: TAG (XPOS) Accuracy
29
  type: accuracy
30
+ value: 0.7320951759
31
  - task:
32
  name: POS
33
  type: token-classification
34
  metrics:
35
  - name: POS (UPOS) Accuracy
36
  type: accuracy
37
+ value: 0.860041932
38
  - task:
39
  name: LEMMA
40
  type: token-classification
41
  metrics:
42
  - name: Lemma Accuracy
43
  type: accuracy
44
+ value: 0.8340354833
45
  - task:
46
  name: UNLABELED_DEPENDENCIES
47
  type: token-classification
48
  metrics:
49
  - name: Unlabeled Attachment Score (UAS)
50
  type: f_score
51
+ value: 0.7420346687
52
  - task:
53
  name: LABELED_DEPENDENCIES
54
  type: token-classification
55
  metrics:
56
  - name: Labeled Attachment Score (LAS)
57
  type: f_score
58
+ value: 0.6614531114
59
  - task:
60
  name: SENTS
61
  type: token-classification
 
71
  | Feature | Description |
72
  | --- | --- |
73
  | **Name** | `ko_core_news_sm` |
74
+ | **Version** | `3.4.0` |
75
+ | **spaCy** | `>=3.4.0,<3.5.0` |
76
  | **Default Pipeline** | `tok2vec`, `tagger`, `morphologizer`, `parser`, `lemmatizer`, `attribute_ruler`, `ner` |
77
  | **Components** | `tok2vec`, `tagger`, `morphologizer`, `parser`, `lemmatizer`, `senter`, `attribute_ruler`, `ner` |
78
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 
84
 
85
  <details>
86
 
87
+ <summary>View label scheme (2028 labels for 4 components)</summary>
88
 
89
  | Component | Labels |
90
  | --- | --- |
91
+ | **`tagger`** | `_SP`, `ecs`, `etm`, `f`, `f+f+jcj`, `f+f+jcs`, `f+f+jct`, `f+f+jxt`, `f+jca`, `f+jca+jp+ecc`, `f+jca+jp+ep+ef`, `f+jca+jxc`, `f+jca+jxc+jcm`, `f+jca+jxt`, `f+jcj`, `f+jcm`, `f+jco`, `f+jcs`, `f+jct`, `f+jct+jcm`, `f+jp+ef`, `f+jp+ep+ef`, `f+jp+etm`, `f+jxc`, `f+jxt`, `f+ncn`, `f+ncn+jcm`, `f+ncn+jcs`, `f+ncn+jp+ecc`, `f+ncn+jxt`, `f+ncpa+jcm`, `f+npp+jcs`, `f+nq`, `f+xsn`, `f+xsn+jco`, `f+xsn+jxt`, `ii`, `jca`, `jca+jcm`, `jca+jxc`, `jca+jxt`, `jcc`, `jcj`, `jcm`, `jco`, `jcr`, `jcr+jxc`, `jcs`, `jct`, `jct+jcm`, `jct+jxt`, `jp+ecc`, `jp+ecs`, `jp+ef`, `jp+ef+jcr`, `jp+ef+jcr+jxc`, `jp+ep+ecs`, `jp+ep+ef`, `jp+ep+etm`, `jp+ep+etn`, `jp+etm`, `jp+etn`, `jp+etn+jco`, `jp+etn+jxc`, `jxc`, `jxc+jca`, `jxc+jco`, `jxc+jcs`, `jxt`, `mad`, `mad+jxc`, `mad+jxt`, `mag`, `mag+jca`, `mag+jcm`, `mag+jcs`, `mag+jp+ef+jcr`, `mag+jxc`, `mag+jxc+jxc`, `mag+jxt`, `mag+xsn`, `maj`, `maj+jxc`, `maj+jxt`, `mma`, `mmd`, `nbn`, `nbn+jca`, `nbn+jca+jcj`, `nbn+jca+jcm`, `nbn+jca+jp+ef`, `nbn+jca+jxc`, `nbn+jca+jxt`, `nbn+jcc`, `nbn+jcj`, `nbn+jcm`, `nbn+jco`, `nbn+jcr`, `nbn+jcs`, `nbn+jct`, `nbn+jct+jcm`, `nbn+jct+jxt`, `nbn+jp+ecc`, `nbn+jp+ecs`, `nbn+jp+ecs+jca`, `nbn+jp+ecs+jcm`, `nbn+jp+ecs+jco`, `nbn+jp+ecs+jxc`, `nbn+jp+ecs+jxt`, `nbn+jp+ecx`, `nbn+jp+ef`, `nbn+jp+ef+jca`, `nbn+jp+ef+jco`, `nbn+jp+ef+jcr`, `nbn+jp+ef+jcr+jxc`, `nbn+jp+ef+jcr+jxt`, `nbn+jp+ef+jcs`, `nbn+jp+ef+jxc`, `nbn+jp+ef+jxc+jco`, `nbn+jp+ef+jxf`, `nbn+jp+ef+jxt`, `nbn+jp+ep+ecc`, `nbn+jp+ep+ecs`, `nbn+jp+ep+ecs+jxc`, `nbn+jp+ep+ef`, `nbn+jp+ep+ef+jcr`, `nbn+jp+ep+etm`, `nbn+jp+ep+etn`, `nbn+jp+ep+etn+jco`, `nbn+jp+ep+etn+jcs`, `nbn+jp+etm`, `nbn+jp+etn`, `nbn+jp+etn+jca`, `nbn+jp+etn+jca+jxt`, `nbn+jp+etn+jco`, `nbn+jp+etn+jcs`, `nbn+jp+etn+jxc`, `nbn+jp+etn+jxt`, `nbn+jxc`, `nbn+jxc+jca`, `nbn+jxc+jca+jxc`, `nbn+jxc+jca+jxt`, `nbn+jxc+jcc`, `nbn+jxc+jcm`, `nbn+jxc+jco`, `nbn+jxc+jcs`, `nbn+jxc+jp+ef`, `nbn+jxc+jxc`, `nbn+jxc+jxt`, `nbn+jxt`, `nbn+nbn`, `nbn+nbn+jp+ef`, `nbn+xsm+ecs`, `nbn+xsm+ef`, `nbn+xsm+ep+ef`, `nbn+xsm+ep+ef+jcr`, `nbn+xsm+etm`, `nbn+xsn`, `nbn+xsn+jca`, `nbn+xsn+jca+jp+ef+jcr`, `nbn+xsn+jca+jxc`, `nbn+xsn+jca+jxt`, `nbn+xsn+jcm`, `nbn+xsn+jco`, `nbn+xsn+jcs`, `nbn+xsn+jct`, `nbn+xsn+jp+ecc`, `nbn+xsn+jp+ecs`, `nbn+xsn+jp+ef`, `nbn+xsn+jp+ef+jcr`, `nbn+xsn+jp+ep+ef`, `nbn+xsn+jxc`, `nbn+xsn+jxt`, `nbn+xsv+etm`, `nbu`, `nbu+jca`, `nbu+jca+jxc`, `nbu+jca+jxt`, `nbu+jcc`, `nbu+jcc+jxc`, `nbu+jcj`, `nbu+jcm`, `nbu+jco`, `nbu+jcs`, `nbu+jct`, `nbu+jct+jxc`, `nbu+jp+ecc`, `nbu+jp+ecs`, `nbu+jp+ef`, `nbu+jp+ef+jcr`, `nbu+jp+ef+jxc`, `nbu+jp+ep+ecc`, `nbu+jp+ep+ecs`, `nbu+jp+ep+ef`, `nbu+jp+ep+ef+jcr`, `nbu+jp+ep+etm`, `nbu+jp+ep+etn+jco`, `nbu+jp+etm`, `nbu+jxc`, `nbu+jxc+jca`, `nbu+jxc+jcs`, `nbu+jxc+jp+ef`, `nbu+jxc+jp+ep+ef`, `nbu+jxc+jxt`, `nbu+jxt`, `nbu+ncn`, `nbu+ncn+jca`, `nbu+ncn+jcm`, `nbu+xsn`, `nbu+xsn+jca`, `nbu+xsn+jca+jxc`, `nbu+xsn+jca+jxt`, `nbu+xsn+jcm`, `nbu+xsn+jco`, `nbu+xsn+jcs`, `nbu+xsn+jp+ecs`, `nbu+xsn+jp+ep+ef`, `nbu+xsn+jxc`, `nbu+xsn+jxc+jxt`, `nbu+xsn+jxt`, `nbu+xsv+ecc`, `nbu+xsv+etm`, `ncn`, `ncn+f+ncpa+jco`, `ncn+jca`, `ncn+jca+jca`, `ncn+jca+jcc`, `ncn+jca+jcj`, `ncn+jca+jcm`, `ncn+jca+jcs`, `ncn+jca+jct`, `ncn+jca+jp+ecc`, `ncn+jca+jp+ecs`, `ncn+jca+jp+ef`, `ncn+jca+jp+ep+ef`, `ncn+jca+jp+etm`, `ncn+jca+jp+etn+jxt`, `ncn+jca+jxc`, `ncn+jca+jxc+jcc`, `ncn+jca+jxc+jcm`, `ncn+jca+jxc+jxc`, `ncn+jca+jxc+jxt`, `ncn+jca+jxt`, `ncn+jcc`, `ncn+jcc+jxc`, `ncn+jcj`, `ncn+jcj+jxt`, `ncn+jcm`, `ncn+jco`, `ncn+jcr`, `ncn+jcr+jxc`, `ncn+jcs`, `ncn+jcs+jxt`, `ncn+jct`, `ncn+jct+jcm`, `ncn+jct+jxc`, `ncn+jct+jxt`, `ncn+jcv`, `ncn+jp+ecc`, `ncn+jp+ecc+jct`, `ncn+jp+ecc+jxc`, `ncn+jp+ecs`, `ncn+jp+ecs+jcm`, `ncn+jp+ecs+jco`, `ncn+jp+ecs+jxc`, `ncn+jp+ecs+jxt`, `ncn+jp+ecx`, `ncn+jp+ef`, `ncn+jp+ef+jca`, `ncn+jp+ef+jcm`, `ncn+jp+ef+jco`, `ncn+jp+ef+jcr`, `ncn+jp+ef+jcr+jxc`, `ncn+jp+ef+jcr+jxt`, `ncn+jp+ef+jp+etm`, `ncn+jp+ef+jxc`, `ncn+jp+ef+jxf`, `ncn+jp+ef+jxt`, `ncn+jp+ep+ecc`, `ncn+jp+ep+ecs`, `ncn+jp+ep+ecs+jxc`, `ncn+jp+ep+ecx`, `ncn+jp+ep+ef`, `ncn+jp+ep+ef+jcr`, `ncn+jp+ep+ef+jcr+jxc`, `ncn+jp+ep+ef+jxc`, `ncn+jp+ep+ef+jxf`, `ncn+jp+ep+ef+jxt`, `ncn+jp+ep+ep+etm`, `ncn+jp+ep+etm`, `ncn+jp+ep+etn`, `ncn+jp+ep+etn+jca`, `ncn+jp+ep+etn+jca+jxc`, `ncn+jp+ep+etn+jco`, `ncn+jp+ep+etn+jcs`, `ncn+jp+ep+etn+jxt`, `ncn+jp+etm`, `ncn+jp+etn`, `ncn+jp+etn+jca`, `ncn+jp+etn+jca+jxc`, `ncn+jp+etn+jca+jxt`, `ncn+jp+etn+jco`, `ncn+jp+etn+jcs`, `ncn+jp+etn+jct`, `ncn+jp+etn+jxc`, `ncn+jp+etn+jxt`, `ncn+jxc`, `ncn+jxc+jca`, `ncn+jxc+jca+jxc`, `ncn+jxc+jca+jxt`, `ncn+jxc+jcc`, `ncn+jxc+jcm`, `ncn+jxc+jco`, `ncn+jxc+jcs`, `ncn+jxc+jct+jxt`, `ncn+jxc+jp+ef`, `ncn+jxc+jp+ef+jcr`, `ncn+jxc+jp+ep+ecs`, `ncn+jxc+jp+ep+ef`, `ncn+jxc+jp+etm`, `ncn+jxc+jxc`, `ncn+jxc+jxt`, `ncn+jxt`, `ncn+jxt+jcm`, `ncn+jxt+jxc`, `ncn+nbn`, `ncn+nbn+jca`, `ncn+nbn+jcm`, `ncn+nbn+jcs`, `ncn+nbn+jp+ecc`, `ncn+nbn+jp+ep+ef`, `ncn+nbn+jxc`, `ncn+nbn+jxt`, `ncn+nbu`, `ncn+nbu+jca`, `ncn+nbu+jcm`, `ncn+nbu+jco`, `ncn+nbu+jp+ef`, `ncn+nbu+jxc`, `ncn+nbu+ncn`, `ncn+ncn`, `ncn+ncn+jca`, `ncn+ncn+jca+jcc`, `ncn+ncn+jca+jcm`, `ncn+ncn+jca+jxc`, `ncn+ncn+jca+jxc+jcm`, `ncn+ncn+jca+jxc+jxc`, `ncn+ncn+jca+jxt`, `ncn+ncn+jcc`, `ncn+ncn+jcj`, `ncn+ncn+jcm`, `ncn+ncn+jco`, `ncn+ncn+jcr`, `ncn+ncn+jcs`, `ncn+ncn+jct`, `ncn+ncn+jct+jcm`, `ncn+ncn+jct+jxc`, `ncn+ncn+jct+jxt`, `ncn+ncn+jp+ecc`, `ncn+ncn+jp+ecs`, `ncn+ncn+jp+ef`, `ncn+ncn+jp+ef+jcm`, `ncn+ncn+jp+ef+jcr`, `ncn+ncn+jp+ef+jcs`, `ncn+ncn+jp+ep+ecc`, `ncn+ncn+jp+ep+ecs`, `ncn+ncn+jp+ep+ef`, `ncn+ncn+jp+ep+ef+jcr`, `ncn+ncn+jp+ep+ep+etm`, `ncn+ncn+jp+ep+etm`, `ncn+ncn+jp+ep+etn`, `ncn+ncn+jp+etm`, `ncn+ncn+jp+etn`, `ncn+ncn+jp+etn+jca`, `ncn+ncn+jp+etn+jco`, `ncn+ncn+jp+etn+jxc`, `ncn+ncn+jxc`, `ncn+ncn+jxc+jca`, `ncn+ncn+jxc+jcc`, `ncn+ncn+jxc+jcm`, `ncn+ncn+jxc+jco`, `ncn+ncn+jxc+jcs`, `ncn+ncn+jxc+jxc`, `ncn+ncn+jxt`, `ncn+ncn+nbn`, `ncn+ncn+ncn`, `ncn+ncn+ncn+jca`, `ncn+ncn+ncn+jca+jcm`, `ncn+ncn+ncn+jca+jxt`, `ncn+ncn+ncn+jcj`, `ncn+ncn+ncn+jcm`, `ncn+ncn+ncn+jco`, `ncn+ncn+ncn+jcs`, `ncn+ncn+ncn+jct+jxt`, `ncn+ncn+ncn+jp+etn+jxc`, `ncn+ncn+ncn+jxt`, `ncn+ncn+ncn+ncn+jca`, `ncn+ncn+ncn+ncn+jca+jxt`, `ncn+ncn+ncn+ncn+jco`, `ncn+ncn+ncn+xsn+jp+etm`, `ncn+ncn+ncpa`, `ncn+ncn+ncpa+jca`, `ncn+ncn+ncpa+jcm`, `ncn+ncn+ncpa+jco`, `ncn+ncn+ncpa+jcs`, `ncn+ncn+ncpa+jxc`, `ncn+ncn+ncpa+jxt`, `ncn+ncn+ncpa+ncn`, `ncn+ncn+ncpa+ncn+jca`, `ncn+ncn+ncpa+ncn+jcj`, `ncn+ncn+ncpa+ncn+jcm`, `ncn+ncn+ncpa+ncn+jxt`, `ncn+ncn+xsn`, `ncn+ncn+xsn+jca`, `ncn+ncn+xsn+jca+jxt`, `ncn+ncn+xsn+jcj`, `ncn+ncn+xsn+jcm`, `ncn+ncn+xsn+jco`, `ncn+ncn+xsn+jcs`, `ncn+ncn+xsn+jct`, `ncn+ncn+xsn+jp+ecs`, `ncn+ncn+xsn+jp+ep+ef`, `ncn+ncn+xsn+jp+etm`, `ncn+ncn+xsn+jxc`, `ncn+ncn+xsn+jxc+jcs`, `ncn+ncn+xsn+jxt`, `ncn+ncn+xsv+ecc`, `ncn+ncn+xsv+etm`, `ncn+ncpa`, `ncn+ncpa+jca`, `ncn+ncpa+jca+jcm`, `ncn+ncpa+jca+jxc`, `ncn+ncpa+jca+jxt`, `ncn+ncpa+jcc`, `ncn+ncpa+jcj`, `ncn+ncpa+jcm`, `ncn+ncpa+jco`, `ncn+ncpa+jcr`, `ncn+ncpa+jcs`, `ncn+ncpa+jct`, `ncn+ncpa+jct+jcm`, `ncn+ncpa+jct+jxt`, `ncn+ncpa+jp+ecc`, `ncn+ncpa+jp+ecc+jxc`, `ncn+ncpa+jp+ecs`, `ncn+ncpa+jp+ecs+jxc`, `ncn+ncpa+jp+ef`, `ncn+ncpa+jp+ef+jcr`, `ncn+ncpa+jp+ef+jcr+jxc`, `ncn+ncpa+jp+ep+ef`, `ncn+ncpa+jp+ep+etm`, `ncn+ncpa+jp+ep+etn`, `ncn+ncpa+jp+etm`, `ncn+ncpa+jxc`, `ncn+ncpa+jxc+jca+jxc`, `ncn+ncpa+jxc+jco`, `ncn+ncpa+jxc+jcs`, `ncn+ncpa+jxt`, `ncn+ncpa+nbn+jcs`, `ncn+ncpa+ncn`, `ncn+ncpa+ncn+jca`, `ncn+ncpa+ncn+jca+jcm`, `ncn+ncpa+ncn+jca+jxc`, `ncn+ncpa+ncn+jca+jxt`, `ncn+ncpa+ncn+jcj`, `ncn+ncpa+ncn+jcm`, `ncn+ncpa+ncn+jco`, `ncn+ncpa+ncn+jcs`, `ncn+ncpa+ncn+jct`, `ncn+ncpa+ncn+jct+jcm`, `ncn+ncpa+ncn+jp+ef+jcr`, `ncn+ncpa+ncn+jp+ep+etm`, `ncn+ncpa+ncn+jxc`, `ncn+ncpa+ncn+jxt`, `ncn+ncpa+ncn+xsn+jcm`, `ncn+ncpa+ncn+xsn+jxt`, `ncn+ncpa+ncpa`, `ncn+ncpa+ncpa+jca`, `ncn+ncpa+ncpa+jcj`, `ncn+ncpa+ncpa+jcm`, `ncn+ncpa+ncpa+jco`, `ncn+ncpa+ncpa+jcs`, `ncn+ncpa+ncpa+jp+ep+ef`, `ncn+ncpa+ncpa+jxt`, `ncn+ncpa+ncpa+ncn`, `ncn+ncpa+xsn`, `ncn+ncpa+xsn+jcm`, `ncn+ncpa+xsn+jco`, `ncn+ncpa+xsn+jcs`, `ncn+ncpa+xsn+jp+ecc`, `ncn+ncpa+xsn+jp+etm`, `ncn+ncpa+xsn+jxt`, `ncn+ncpa+xsv+ecc`, `ncn+ncpa+xsv+ecs`, `ncn+ncpa+xsv+ecx`, `ncn+ncpa+xsv+ecx+px+etm`, `ncn+ncpa+xsv+ef`, `ncn+ncpa+xsv+ef+jcm`, `ncn+ncpa+xsv+ef+jcr`, `ncn+ncpa+xsv+etm`, _(truncated: full list in pipeline meta)_ |
92
+ | **`morphologizer`** | `POS=CCONJ`, `POS=ADV`, `POS=SCONJ`, `POS=DET`, `POS=NOUN`, `POS=VERB`, `POS=ADJ`, `POS=PUNCT`, `POS=SPACE`, `POS=AUX`, `POS=PRON`, `POS=PROPN`, `POS=NUM`, `POS=INTJ`, `POS=PART`, `POS=X`, `POS=ADP`, `POS=SYM` |
93
  | **`parser`** | `ROOT`, `acl`, `advcl`, `advmod`, `amod`, `appos`, `aux`, `case`, `cc`, `ccomp`, `compound`, `conj`, `cop`, `csubj`, `dep`, `det`, `dislocated`, `fixed`, `flat`, `iobj`, `mark`, `nmod`, `nsubj`, `nummod`, `obj`, `obl`, `punct`, `xcomp` |
94
  | **`ner`** | `DT`, `LC`, `OG`, `PS`, `QT`, `TI` |
95
 
 
103
  | `TOKEN_P` | 100.00 |
104
  | `TOKEN_R` | 100.00 |
105
  | `TOKEN_F` | 100.00 |
106
+ | `TAG_ACC` | 73.21 |
107
+ | `POS_ACC` | 86.00 |
108
  | `SENTS_P` | 100.00 |
109
  | `SENTS_R` | 100.00 |
110
  | `SENTS_F` | 100.00 |
111
+ | `DEP_UAS` | 74.20 |
112
+ | `DEP_LAS` | 66.15 |
113
+ | `LEMMA_ACC` | 83.40 |
114
+ | `ENTS_P` | 77.02 |
115
+ | `ENTS_R` | 66.47 |
116
+ | `ENTS_F` | 71.36 |
accuracy.json CHANGED
@@ -3,118 +3,113 @@
3
  "token_p": 1.0,
4
  "token_r": 1.0,
5
  "token_f": 1.0,
6
- "tag_acc": 0.7335983702,
7
- "pos_acc": 0.8608501296,
8
  "sents_p": 1.0,
9
  "sents_r": 1.0,
10
  "sents_f": 1.0,
11
- "dep_uas": 0.7375785393,
12
- "dep_las": 0.6555857582,
13
  "dep_las_per_type": {
14
  "amod": {
15
- "p": 0.7562056738,
16
- "r": 0.7469352014,
17
- "f": 0.7515418502
18
  },
19
  "dislocated": {
20
- "p": 0.5577299413,
21
- "r": 0.555916775,
22
- "f": 0.5568218821
23
  },
24
  "root": {
25
- "p": 0.8025169409,
26
- "r": 0.8025169409,
27
- "f": 0.8025169409
28
  },
29
  "nmod": {
30
- "p": 0.6305841924,
31
- "r": 0.6534124629,
32
- "f": 0.6417953949
33
  },
34
  "nsubj": {
35
- "p": 0.5475830816,
36
- "r": 0.5781499203,
37
- "f": 0.5624515128
38
  },
39
  "advmod": {
40
- "p": 0.641955836,
41
- "r": 0.6389324961,
42
- "f": 0.640440598
43
  },
44
  "dep": {
45
- "p": 0.4827586207,
46
- "r": 0.3867403315,
47
- "f": 0.4294478528
48
  },
49
  "conj": {
50
- "p": 0.4474660074,
51
- "r": 0.4641025641,
52
- "f": 0.4556324733
53
  },
54
  "xcomp": {
55
- "p": 0.5263157895,
56
- "r": 0.4810996564,
57
- "f": 0.5026929982
58
  },
59
  "flat": {
60
- "p": 0.2916666667,
61
- "r": 0.0654205607,
62
- "f": 0.106870229
63
  },
64
  "obj": {
65
- "p": 0.7117552335,
66
- "r": 0.7125201505,
67
- "f": 0.7121374866
68
  },
69
  "acl": {
70
- "p": 0.6658566221,
71
- "r": 0.6574685063,
72
- "f": 0.6616359795
73
  },
74
  "advcl": {
75
- "p": 0.5574112735,
76
- "r": 0.5437881874,
77
- "f": 0.5505154639
78
  },
79
  "det": {
80
- "p": 0.7582781457,
81
- "r": 0.8609022556,
82
- "f": 0.8063380282
83
  },
84
  "compound": {
85
- "p": 0.6999360205,
86
- "r": 0.6678876679,
87
- "f": 0.6835363949
88
  },
89
  "ccomp": {
90
- "p": 0.4908946952,
91
- "r": 0.5145228216,
92
- "f": 0.5024311183
93
  },
94
  "obl": {
95
- "p": 0.6481223922,
96
- "r": 0.6357435198,
97
- "f": 0.6418732782
98
  },
99
  "aux": {
100
- "p": 0.8774811773,
101
- "r": 0.8990182328,
102
- "f": 0.8881191548
103
  },
104
  "cc": {
105
- "p": 0.7802547771,
106
- "r": 0.8112582781,
107
- "f": 0.7954545455
108
  },
109
  "nummod": {
110
- "p": 0.8333333333,
111
- "r": 0.8115183246,
112
- "f": 0.8222811671
113
- },
114
- "iobj": {
115
- "p": 0.7230769231,
116
- "r": 0.661971831,
117
- "f": 0.6911764706
118
  },
119
  "discourse": {
120
  "p": 0.0,
@@ -122,76 +117,81 @@
122
  "f": 0.0
123
  },
124
  "fixed": {
125
- "p": 0.9484126984,
126
- "r": 1.0,
127
- "f": 0.9735234216
128
  },
129
  "csubj": {
130
- "p": 0.5157894737,
131
- "r": 0.4454545455,
132
- "f": 0.4780487805
133
  },
134
  "mark": {
135
- "p": 0.6862745098,
136
- "r": 0.5384615385,
137
- "f": 0.6034482759
 
 
 
 
 
138
  },
139
  "case": {
140
- "p": 0.8787878788,
141
  "r": 0.8529411765,
142
- "f": 0.8656716418
143
- },
144
- "appos": {
145
- "p": 0.6111111111,
146
- "r": 0.7857142857,
147
- "f": 0.6875
148
  },
149
  "cop": {
150
- "p": 0.75,
151
- "r": 0.8,
152
- "f": 0.7741935484
153
  },
154
  "vocative": {
155
  "p": 0.0,
156
  "r": 0.0,
157
  "f": 0.0
 
 
 
 
 
158
  }
159
  },
160
- "lemma_acc": 0.8317806919,
161
- "speed": 10661.1584237889,
162
- "ents_p": 0.7714191745,
163
- "ents_r": 0.6628046627,
164
- "ents_f": 0.712999202,
165
  "ents_per_type": {
166
  "OG": {
167
- "p": 0.6825588413,
168
- "r": 0.5243393602,
169
- "f": 0.5930781332
170
  },
171
  "PS": {
172
- "p": 0.7701958384,
173
- "r": 0.5719154738,
174
- "f": 0.656408919
175
  },
176
  "QT": {
177
- "p": 0.853619981,
178
- "r": 0.8656620712,
179
- "f": 0.8595988539
180
  },
181
  "LC": {
182
- "p": 0.5845942228,
183
- "r": 0.5227552276,
184
- "f": 0.5519480519
185
  },
186
  "DT": {
187
- "p": 0.8222118525,
188
- "r": 0.7637624621,
189
- "f": 0.7919101124
190
  },
191
  "TI": {
192
- "p": 0.8846960168,
193
- "r": 0.7743119266,
194
- "f": 0.8258317025
195
  }
196
  }
197
  }
 
3
  "token_p": 1.0,
4
  "token_r": 1.0,
5
  "token_f": 1.0,
6
+ "tag_acc": 0.7320951759,
7
+ "pos_acc": 0.860041932,
8
  "sents_p": 1.0,
9
  "sents_r": 1.0,
10
  "sents_f": 1.0,
11
+ "dep_uas": 0.7420346687,
12
+ "dep_las": 0.6614531114,
13
  "dep_las_per_type": {
14
  "amod": {
15
+ "p": 0.7620320856,
16
+ "r": 0.7486865149,
17
+ "f": 0.7553003534
18
  },
19
  "dislocated": {
20
+ "p": 0.561827957,
21
+ "r": 0.5435630689,
22
+ "f": 0.5525446134
23
  },
24
  "root": {
25
+ "p": 0.8146176186,
26
+ "r": 0.8146176186,
27
+ "f": 0.8146176186
28
  },
29
  "nmod": {
30
+ "p": 0.626799557,
31
+ "r": 0.671810089,
32
+ "f": 0.648524778
33
  },
34
  "nsubj": {
35
+ "p": 0.5777604977,
36
+ "r": 0.5925039872,
37
+ "f": 0.5850393701
38
  },
39
  "advmod": {
40
+ "p": 0.6664056382,
41
+ "r": 0.6679748823,
42
+ "f": 0.6671893375
43
  },
44
  "dep": {
45
+ "p": 0.5165562914,
46
+ "r": 0.4309392265,
47
+ "f": 0.4698795181
48
  },
49
  "conj": {
50
+ "p": 0.4520123839,
51
+ "r": 0.4679487179,
52
+ "f": 0.4598425197
53
  },
54
  "xcomp": {
55
+ "p": 0.5681818182,
56
+ "r": 0.5154639175,
57
+ "f": 0.5405405405
58
  },
59
  "flat": {
60
+ "p": 0.7142857143,
61
+ "r": 0.0934579439,
62
+ "f": 0.1652892562
63
  },
64
  "obj": {
65
+ "p": 0.6926293779,
66
+ "r": 0.7119828049,
67
+ "f": 0.702172761
68
  },
69
  "acl": {
70
+ "p": 0.6581818182,
71
+ "r": 0.6514697061,
72
+ "f": 0.654808562
73
  },
74
  "advcl": {
75
+ "p": 0.5576005453,
76
+ "r": 0.55532926,
77
+ "f": 0.556462585
78
  },
79
  "det": {
80
+ "p": 0.7516339869,
81
+ "r": 0.8646616541,
82
+ "f": 0.8041958042
83
  },
84
  "compound": {
85
+ "p": 0.6823679185,
86
+ "r": 0.6544566545,
87
+ "f": 0.6681209099
88
  },
89
  "ccomp": {
90
+ "p": 0.5081833061,
91
+ "r": 0.5153526971,
92
+ "f": 0.5117428925
93
  },
94
  "obl": {
95
+ "p": 0.6657381616,
96
+ "r": 0.6521145975,
97
+ "f": 0.6588559614
98
  },
99
  "aux": {
100
+ "p": 0.8905817175,
101
+ "r": 0.9018232819,
102
+ "f": 0.8961672474
103
  },
104
  "cc": {
105
+ "p": 0.807073955,
106
+ "r": 0.8311258278,
107
+ "f": 0.8189233279
108
  },
109
  "nummod": {
110
+ "p": 0.875,
111
+ "r": 0.8429319372,
112
+ "f": 0.8586666667
 
 
 
 
 
113
  },
114
  "discourse": {
115
  "p": 0.0,
 
117
  "f": 0.0
118
  },
119
  "fixed": {
120
+ "p": 0.9596774194,
121
+ "r": 0.9958158996,
122
+ "f": 0.977412731
123
  },
124
  "csubj": {
125
+ "p": 0.5471698113,
126
+ "r": 0.5272727273,
127
+ "f": 0.537037037
128
  },
129
  "mark": {
130
+ "p": 0.6666666667,
131
+ "r": 0.5538461538,
132
+ "f": 0.6050420168
133
+ },
134
+ "iobj": {
135
+ "p": 0.6666666667,
136
+ "r": 0.5070422535,
137
+ "f": 0.576
138
  },
139
  "case": {
140
+ "p": 0.8529411765,
141
  "r": 0.8529411765,
142
+ "f": 0.8529411765
 
 
 
 
 
143
  },
144
  "cop": {
145
+ "p": 0.7647058824,
146
+ "r": 0.8666666667,
147
+ "f": 0.8125
148
  },
149
  "vocative": {
150
  "p": 0.0,
151
  "r": 0.0,
152
  "f": 0.0
153
+ },
154
+ "appos": {
155
+ "p": 0.6428571429,
156
+ "r": 0.6428571429,
157
+ "f": 0.6428571429
158
  }
159
  },
160
+ "lemma_acc": 0.8340354833,
161
+ "speed": 12550.4676822577,
162
+ "ents_p": 0.7701563395,
163
+ "ents_r": 0.6647121159,
164
+ "ents_f": 0.7135598362,
165
  "ents_per_type": {
166
  "OG": {
167
+ "p": 0.6808894231,
168
+ "r": 0.5252665739,
169
+ "f": 0.5930384716
170
  },
171
  "PS": {
172
+ "p": 0.7770642202,
173
+ "r": 0.5773687798,
174
+ "f": 0.6624951115
175
  },
176
  "QT": {
177
+ "p": 0.8434484905,
178
+ "r": 0.868868227,
179
+ "f": 0.8559696778
180
  },
181
  "LC": {
182
+ "p": 0.5856847901,
183
+ "r": 0.5233702337,
184
+ "f": 0.5527768756
185
  },
186
  "DT": {
187
+ "p": 0.8166589111,
188
+ "r": 0.7607282185,
189
+ "f": 0.7877019749
190
  },
191
  "TI": {
192
+ "p": 0.8952991453,
193
+ "r": 0.7688073394,
194
+ "f": 0.8272458045
195
  }
196
  }
197
  }
ko_core_news_sm-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:834bd49736cb201f1db9938588235ebe9d938da4f6f27a1c0f88ddbbba981a1d
3
- size 14706695
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1953f5d7937d3e913d371d432c36f3c9b6c5ea00e219b37b9f7f7e77f10d79e9
3
+ size 14703805
lemmatizer/cfg CHANGED
@@ -903,250 +903,243 @@
903
  1708,
904
  1709,
905
  1711,
906
- 1713,
907
  1716,
908
- 1718,
909
- 1721,
910
- 1722,
911
- 1725,
912
- 1728,
913
  1729,
 
914
  1731,
915
- 1732,
916
- 1733,
917
- 1736,
918
  1739,
919
- 1741,
920
  1744,
921
  1746,
922
- 1748,
923
- 1751,
924
  1754,
925
- 1756,
926
- 1761,
927
  1764,
928
- 1766,
929
  1769,
930
  1771,
931
  1773,
932
- 1775,
933
  120,
934
- 1777,
 
935
  1778,
936
- 1780,
937
  1783,
938
- 1785,
939
  1786,
940
  1788,
 
941
  1790,
942
- 1791,
943
  1792,
944
  1794,
945
  1796,
946
  1798,
947
- 1800,
948
  1803,
949
- 1805,
950
  1806,
951
- 1808,
952
- 1811,
953
  1812,
954
- 1814,
955
  888,
 
 
956
  1816,
957
  1817,
958
- 1818,
959
- 1819,
960
- 1822,
961
- 1825,
962
- 1828,
963
  1700,
964
- 1830,
965
- 1833,
966
- 1834,
967
- 1837,
 
968
  1840,
969
- 1842,
970
- 1845,
971
  1848,
972
  1850,
973
- 1853,
974
  1855,
 
975
  1857,
976
- 1860,
977
- 1861,
978
  1862,
979
- 1863,
980
  1865,
981
  1867,
982
- 1868,
983
  1870,
984
  1872,
985
- 1873,
986
- 1875,
987
- 1877,
988
- 1879,
989
  1881,
990
- 1884,
991
  1885,
992
- 1888,
993
  1889,
994
- 1892,
995
- 1894,
 
996
  1898,
997
  1900,
998
- 1901,
999
- 1903,
1000
- 1905,
1001
  1907,
 
1002
  1910,
1003
- 1911,
1004
  1913,
1005
- 1915,
1006
- 1916,
1007
  1920,
1008
- 1922,
1009
  1923,
 
1010
  1926,
1011
  1928,
1012
  1929,
1013
- 1931,
1014
  1932,
1015
- 1935,
1016
  1936,
1017
- 1939,
1018
  1941,
1019
- 1944,
1020
- 1946,
1021
- 1948,
1022
  1950,
1023
- 1953,
 
1024
  1955,
1025
- 1957,
1026
  1958,
1027
- 1959,
1028
  1961,
 
1029
  1964,
1030
- 1966,
1031
  1967,
 
1032
  1970,
1033
  1971,
1034
  1973,
1035
- 1974,
1036
- 1976,
1037
- 1980,
1038
- 1981,
1039
  1982,
1040
- 1985,
1041
- 1987,
1042
- 1989,
1043
- 1991,
1044
- 1993,
1045
- 1995,
1046
  109,
1047
- 1997,
 
1048
  1999,
 
1049
  2002,
1050
  2004,
1051
- 2005,
1052
  2007,
1053
- 2010,
 
1054
  2011,
1055
- 2012,
1056
- 2014,
1057
- 2016,
1058
  2018,
 
1059
  2021,
1060
- 2023,
1061
  2024,
1062
- 2025,
1063
  2027,
1064
- 2030,
1065
- 2031,
 
1066
  2035,
1067
- 2036,
1068
  2038,
 
1069
  2041,
1070
  2042,
1071
- 2044,
1072
  2045,
 
1073
  2048,
1074
  2049,
1075
  2051,
1076
- 2052,
1077
- 2054,
1078
  2056,
1079
  2058,
1080
- 2059,
1081
- 2061,
1082
- 2063,
1083
- 2065,
1084
  2067,
1085
  2069,
1086
- 2070,
1087
- 2072,
1088
  2074,
1089
- 2077,
1090
  2079,
1091
- 2081,
1092
- 2084,
1093
- 2085,
1094
- 2088,
1095
  911,
 
 
1096
  2089,
1097
- 2092,
 
1098
  2094,
1099
- 2096,
1100
- 2098,
1101
- 2099,
1102
- 2102,
1103
- 2105,
1104
- 2109,
1105
- 2113,
1106
- 2115,
1107
  2117,
1108
- 2119,
1109
- 2122,
1110
- 2125,
1111
  2126,
1112
- 2128,
1113
- 2131,
1114
  2132,
1115
  2133,
1116
- 2136,
1117
- 2138,
 
 
1118
  2139,
1119
  2140,
1120
- 2141,
1121
  2143,
1122
- 611,
1123
  2145,
1124
- 2146,
1125
  2149,
1126
  2151,
1127
  2153,
1128
- 2155,
1129
  2157,
 
1130
  2159,
1131
- 2161,
1132
  2164,
1133
- 2165,
1134
- 2166,
1135
  2167,
1136
- 2171,
1137
  2172,
1138
  2175,
1139
  2178,
1140
  2180,
 
1141
  2183,
 
1142
  2186,
1143
- 2188,
1144
- 2190,
1145
- 2191,
1146
- 2193,
1147
- 2194,
1148
- 2196,
1149
- 2197,
1150
- 2199
1151
  ]
1152
  }
 
903
  1708,
904
  1709,
905
  1711,
906
+ 1714,
907
  1716,
908
+ 1719,
909
+ 1720,
910
+ 1723,
911
+ 1726,
912
+ 1727,
913
  1729,
914
+ 1730,
915
  1731,
916
+ 1734,
917
+ 1737,
 
918
  1739,
919
+ 1742,
920
  1744,
921
  1746,
922
+ 1749,
923
+ 1752,
924
  1754,
925
+ 1759,
926
+ 1762,
927
  1764,
928
+ 1767,
929
  1769,
930
  1771,
931
  1773,
 
932
  120,
933
+ 1775,
934
+ 1776,
935
  1778,
936
+ 1781,
937
  1783,
938
+ 1784,
939
  1786,
940
  1788,
941
+ 1789,
942
  1790,
 
943
  1792,
944
  1794,
945
  1796,
946
  1798,
947
+ 1801,
948
  1803,
949
+ 1804,
950
  1806,
951
+ 1809,
952
+ 1810,
953
  1812,
 
954
  888,
955
+ 1814,
956
+ 1815,
957
  1816,
958
  1817,
959
+ 1820,
960
+ 1823,
961
+ 1826,
 
 
962
  1700,
963
+ 1828,
964
+ 1831,
965
+ 1832,
966
+ 1835,
967
+ 1838,
968
  1840,
969
+ 1843,
970
+ 1846,
971
  1848,
972
  1850,
973
+ 1852,
974
  1855,
975
+ 1856,
976
  1857,
977
+ 1859,
 
978
  1862,
979
+ 1864,
980
  1865,
981
  1867,
982
+ 1869,
983
  1870,
984
  1872,
985
+ 1874,
986
+ 1876,
987
+ 1878,
 
988
  1881,
989
+ 1882,
990
  1885,
991
+ 1886,
992
  1889,
993
+ 1891,
994
+ 1895,
995
+ 1897,
996
  1898,
997
  1900,
998
+ 1902,
999
+ 1904,
 
1000
  1907,
1001
+ 1908,
1002
  1910,
1003
+ 1912,
1004
  1913,
1005
+ 1917,
1006
+ 1919,
1007
  1920,
 
1008
  1923,
1009
+ 1925,
1010
  1926,
1011
  1928,
1012
  1929,
 
1013
  1932,
1014
+ 1933,
1015
  1936,
1016
+ 1938,
1017
  1941,
1018
+ 1943,
1019
+ 1945,
1020
+ 1947,
1021
  1950,
1022
+ 1952,
1023
+ 1954,
1024
  1955,
1025
+ 1956,
1026
  1958,
 
1027
  1961,
1028
+ 1963,
1029
  1964,
 
1030
  1967,
1031
+ 1968,
1032
  1970,
1033
  1971,
1034
  1973,
1035
+ 1977,
1036
+ 1978,
1037
+ 1979,
 
1038
  1982,
1039
+ 1984,
1040
+ 1986,
1041
+ 1988,
1042
+ 1990,
1043
+ 1992,
 
1044
  109,
1045
+ 1994,
1046
+ 1996,
1047
  1999,
1048
+ 2001,
1049
  2002,
1050
  2004,
 
1051
  2007,
1052
+ 2008,
1053
+ 2009,
1054
  2011,
1055
+ 2013,
1056
+ 2015,
 
1057
  2018,
1058
+ 2020,
1059
  2021,
1060
+ 2022,
1061
  2024,
 
1062
  2027,
1063
+ 2028,
1064
+ 2032,
1065
+ 2033,
1066
  2035,
 
1067
  2038,
1068
+ 2039,
1069
  2041,
1070
  2042,
 
1071
  2045,
1072
+ 2046,
1073
  2048,
1074
  2049,
1075
  2051,
1076
+ 2053,
1077
+ 2055,
1078
  2056,
1079
  2058,
1080
+ 2060,
1081
+ 2062,
1082
+ 2064,
1083
+ 2066,
1084
  2067,
1085
  2069,
1086
+ 2071,
 
1087
  2074,
1088
+ 2076,
1089
  2079,
1090
+ 2080,
1091
+ 2083,
 
 
1092
  911,
1093
+ 2084,
1094
+ 2087,
1095
  2089,
1096
+ 2091,
1097
+ 2093,
1098
  2094,
1099
+ 2097,
1100
+ 2100,
1101
+ 2104,
1102
+ 2108,
1103
+ 2110,
1104
+ 2112,
1105
+ 2114,
 
1106
  2117,
1107
+ 2120,
1108
+ 2121,
1109
+ 2123,
1110
  2126,
1111
+ 2127,
1112
+ 2130,
1113
  2132,
1114
  2133,
1115
+ 2134,
1116
+ 2135,
1117
+ 2137,
1118
+ 611,
1119
  2139,
1120
  2140,
 
1121
  2143,
 
1122
  2145,
1123
+ 2147,
1124
  2149,
1125
  2151,
1126
  2153,
1127
+ 2156,
1128
  2157,
1129
+ 2158,
1130
  2159,
1131
+ 2163,
1132
  2164,
 
 
1133
  2167,
1134
+ 2170,
1135
  2172,
1136
  2175,
1137
  2178,
1138
  2180,
1139
+ 2182,
1140
  2183,
1141
+ 2185,
1142
  2186,
1143
+ 2188
 
 
 
 
 
 
 
1144
  ]
1145
  }
lemmatizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57cc44fd82385dc4acb07594f2430e3f8e96b5a109ff549a83b8e3a338886f96
3
- size 445866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5ca0aee4b1a4d59e98dcd4bc616cd205265ad8e26716e22c4cac3d799a06024
3
+ size 443150
lemmatizer/trees CHANGED
Binary files a/lemmatizer/trees and b/lemmatizer/trees differ
 
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"ko",
3
  "name":"core_news_sm",
4
- "version":"3.3.0",
5
  "description":"Korean pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 4.0",
10
- "spacy_version":">=3.3.0.dev0,<3.4.0",
11
- "spacy_git_version":"849bef2de",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
@@ -20,6 +20,7 @@
20
 
21
  ],
22
  "tagger":[
 
23
  "ecs",
24
  "etm",
25
  "f",
@@ -2005,6 +2006,7 @@
2005
  "POS=VERB",
2006
  "POS=ADJ",
2007
  "POS=PUNCT",
 
2008
  "POS=AUX",
2009
  "POS=PRON",
2010
  "POS=PROPN",
@@ -2084,118 +2086,113 @@
2084
  "token_p":1.0,
2085
  "token_r":1.0,
2086
  "token_f":1.0,
2087
- "tag_acc":0.7335983702,
2088
- "pos_acc":0.8608501296,
2089
  "sents_p":1.0,
2090
  "sents_r":1.0,
2091
  "sents_f":1.0,
2092
- "dep_uas":0.7375785393,
2093
- "dep_las":0.6555857582,
2094
  "dep_las_per_type":{
2095
  "amod":{
2096
- "p":0.7562056738,
2097
- "r":0.7469352014,
2098
- "f":0.7515418502
2099
  },
2100
  "dislocated":{
2101
- "p":0.5577299413,
2102
- "r":0.555916775,
2103
- "f":0.5568218821
2104
  },
2105
  "root":{
2106
- "p":0.8025169409,
2107
- "r":0.8025169409,
2108
- "f":0.8025169409
2109
  },
2110
  "nmod":{
2111
- "p":0.6305841924,
2112
- "r":0.6534124629,
2113
- "f":0.6417953949
2114
  },
2115
  "nsubj":{
2116
- "p":0.5475830816,
2117
- "r":0.5781499203,
2118
- "f":0.5624515128
2119
  },
2120
  "advmod":{
2121
- "p":0.641955836,
2122
- "r":0.6389324961,
2123
- "f":0.640440598
2124
  },
2125
  "dep":{
2126
- "p":0.4827586207,
2127
- "r":0.3867403315,
2128
- "f":0.4294478528
2129
  },
2130
  "conj":{
2131
- "p":0.4474660074,
2132
- "r":0.4641025641,
2133
- "f":0.4556324733
2134
  },
2135
  "xcomp":{
2136
- "p":0.5263157895,
2137
- "r":0.4810996564,
2138
- "f":0.5026929982
2139
  },
2140
  "flat":{
2141
- "p":0.2916666667,
2142
- "r":0.0654205607,
2143
- "f":0.106870229
2144
  },
2145
  "obj":{
2146
- "p":0.7117552335,
2147
- "r":0.7125201505,
2148
- "f":0.7121374866
2149
  },
2150
  "acl":{
2151
- "p":0.6658566221,
2152
- "r":0.6574685063,
2153
- "f":0.6616359795
2154
  },
2155
  "advcl":{
2156
- "p":0.5574112735,
2157
- "r":0.5437881874,
2158
- "f":0.5505154639
2159
  },
2160
  "det":{
2161
- "p":0.7582781457,
2162
- "r":0.8609022556,
2163
- "f":0.8063380282
2164
  },
2165
  "compound":{
2166
- "p":0.6999360205,
2167
- "r":0.6678876679,
2168
- "f":0.6835363949
2169
  },
2170
  "ccomp":{
2171
- "p":0.4908946952,
2172
- "r":0.5145228216,
2173
- "f":0.5024311183
2174
  },
2175
  "obl":{
2176
- "p":0.6481223922,
2177
- "r":0.6357435198,
2178
- "f":0.6418732782
2179
  },
2180
  "aux":{
2181
- "p":0.8774811773,
2182
- "r":0.8990182328,
2183
- "f":0.8881191548
2184
  },
2185
  "cc":{
2186
- "p":0.7802547771,
2187
- "r":0.8112582781,
2188
- "f":0.7954545455
2189
  },
2190
  "nummod":{
2191
- "p":0.8333333333,
2192
- "r":0.8115183246,
2193
- "f":0.8222811671
2194
- },
2195
- "iobj":{
2196
- "p":0.7230769231,
2197
- "r":0.661971831,
2198
- "f":0.6911764706
2199
  },
2200
  "discourse":{
2201
  "p":0.0,
@@ -2203,76 +2200,81 @@
2203
  "f":0.0
2204
  },
2205
  "fixed":{
2206
- "p":0.9484126984,
2207
- "r":1.0,
2208
- "f":0.9735234216
2209
  },
2210
  "csubj":{
2211
- "p":0.5157894737,
2212
- "r":0.4454545455,
2213
- "f":0.4780487805
2214
  },
2215
  "mark":{
2216
- "p":0.6862745098,
2217
- "r":0.5384615385,
2218
- "f":0.6034482759
 
 
 
 
 
2219
  },
2220
  "case":{
2221
- "p":0.8787878788,
2222
  "r":0.8529411765,
2223
- "f":0.8656716418
2224
- },
2225
- "appos":{
2226
- "p":0.6111111111,
2227
- "r":0.7857142857,
2228
- "f":0.6875
2229
  },
2230
  "cop":{
2231
- "p":0.75,
2232
- "r":0.8,
2233
- "f":0.7741935484
2234
  },
2235
  "vocative":{
2236
  "p":0.0,
2237
  "r":0.0,
2238
  "f":0.0
 
 
 
 
 
2239
  }
2240
  },
2241
- "lemma_acc":0.8317806919,
2242
- "speed":10661.1584237889,
2243
- "ents_p":0.7714191745,
2244
- "ents_r":0.6628046627,
2245
- "ents_f":0.712999202,
2246
  "ents_per_type":{
2247
  "OG":{
2248
- "p":0.6825588413,
2249
- "r":0.5243393602,
2250
- "f":0.5930781332
2251
  },
2252
  "PS":{
2253
- "p":0.7701958384,
2254
- "r":0.5719154738,
2255
- "f":0.656408919
2256
  },
2257
  "QT":{
2258
- "p":0.853619981,
2259
- "r":0.8656620712,
2260
- "f":0.8595988539
2261
  },
2262
  "LC":{
2263
- "p":0.5845942228,
2264
- "r":0.5227552276,
2265
- "f":0.5519480519
2266
  },
2267
  "DT":{
2268
- "p":0.8222118525,
2269
- "r":0.7637624621,
2270
- "f":0.7919101124
2271
  },
2272
  "TI":{
2273
- "p":0.8846960168,
2274
- "r":0.7743119266,
2275
- "f":0.8258317025
2276
  }
2277
  }
2278
  },
 
1
  {
2
  "lang":"ko",
3
  "name":"core_news_sm",
4
+ "version":"3.4.0",
5
  "description":"Korean pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"CC BY-SA 4.0",
10
+ "spacy_version":">=3.4.0,<3.5.0",
11
+ "spacy_git_version":"dd038b536",
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
 
20
 
21
  ],
22
  "tagger":[
23
+ "_SP",
24
  "ecs",
25
  "etm",
26
  "f",
 
2006
  "POS=VERB",
2007
  "POS=ADJ",
2008
  "POS=PUNCT",
2009
+ "POS=SPACE",
2010
  "POS=AUX",
2011
  "POS=PRON",
2012
  "POS=PROPN",
 
2086
  "token_p":1.0,
2087
  "token_r":1.0,
2088
  "token_f":1.0,
2089
+ "tag_acc":0.7320951759,
2090
+ "pos_acc":0.860041932,
2091
  "sents_p":1.0,
2092
  "sents_r":1.0,
2093
  "sents_f":1.0,
2094
+ "dep_uas":0.7420346687,
2095
+ "dep_las":0.6614531114,
2096
  "dep_las_per_type":{
2097
  "amod":{
2098
+ "p":0.7620320856,
2099
+ "r":0.7486865149,
2100
+ "f":0.7553003534
2101
  },
2102
  "dislocated":{
2103
+ "p":0.561827957,
2104
+ "r":0.5435630689,
2105
+ "f":0.5525446134
2106
  },
2107
  "root":{
2108
+ "p":0.8146176186,
2109
+ "r":0.8146176186,
2110
+ "f":0.8146176186
2111
  },
2112
  "nmod":{
2113
+ "p":0.626799557,
2114
+ "r":0.671810089,
2115
+ "f":0.648524778
2116
  },
2117
  "nsubj":{
2118
+ "p":0.5777604977,
2119
+ "r":0.5925039872,
2120
+ "f":0.5850393701
2121
  },
2122
  "advmod":{
2123
+ "p":0.6664056382,
2124
+ "r":0.6679748823,
2125
+ "f":0.6671893375
2126
  },
2127
  "dep":{
2128
+ "p":0.5165562914,
2129
+ "r":0.4309392265,
2130
+ "f":0.4698795181
2131
  },
2132
  "conj":{
2133
+ "p":0.4520123839,
2134
+ "r":0.4679487179,
2135
+ "f":0.4598425197
2136
  },
2137
  "xcomp":{
2138
+ "p":0.5681818182,
2139
+ "r":0.5154639175,
2140
+ "f":0.5405405405
2141
  },
2142
  "flat":{
2143
+ "p":0.7142857143,
2144
+ "r":0.0934579439,
2145
+ "f":0.1652892562
2146
  },
2147
  "obj":{
2148
+ "p":0.6926293779,
2149
+ "r":0.7119828049,
2150
+ "f":0.702172761
2151
  },
2152
  "acl":{
2153
+ "p":0.6581818182,
2154
+ "r":0.6514697061,
2155
+ "f":0.654808562
2156
  },
2157
  "advcl":{
2158
+ "p":0.5576005453,
2159
+ "r":0.55532926,
2160
+ "f":0.556462585
2161
  },
2162
  "det":{
2163
+ "p":0.7516339869,
2164
+ "r":0.8646616541,
2165
+ "f":0.8041958042
2166
  },
2167
  "compound":{
2168
+ "p":0.6823679185,
2169
+ "r":0.6544566545,
2170
+ "f":0.6681209099
2171
  },
2172
  "ccomp":{
2173
+ "p":0.5081833061,
2174
+ "r":0.5153526971,
2175
+ "f":0.5117428925
2176
  },
2177
  "obl":{
2178
+ "p":0.6657381616,
2179
+ "r":0.6521145975,
2180
+ "f":0.6588559614
2181
  },
2182
  "aux":{
2183
+ "p":0.8905817175,
2184
+ "r":0.9018232819,
2185
+ "f":0.8961672474
2186
  },
2187
  "cc":{
2188
+ "p":0.807073955,
2189
+ "r":0.8311258278,
2190
+ "f":0.8189233279
2191
  },
2192
  "nummod":{
2193
+ "p":0.875,
2194
+ "r":0.8429319372,
2195
+ "f":0.8586666667
 
 
 
 
 
2196
  },
2197
  "discourse":{
2198
  "p":0.0,
 
2200
  "f":0.0
2201
  },
2202
  "fixed":{
2203
+ "p":0.9596774194,
2204
+ "r":0.9958158996,
2205
+ "f":0.977412731
2206
  },
2207
  "csubj":{
2208
+ "p":0.5471698113,
2209
+ "r":0.5272727273,
2210
+ "f":0.537037037
2211
  },
2212
  "mark":{
2213
+ "p":0.6666666667,
2214
+ "r":0.5538461538,
2215
+ "f":0.6050420168
2216
+ },
2217
+ "iobj":{
2218
+ "p":0.6666666667,
2219
+ "r":0.5070422535,
2220
+ "f":0.576
2221
  },
2222
  "case":{
2223
+ "p":0.8529411765,
2224
  "r":0.8529411765,
2225
+ "f":0.8529411765
 
 
 
 
 
2226
  },
2227
  "cop":{
2228
+ "p":0.7647058824,
2229
+ "r":0.8666666667,
2230
+ "f":0.8125
2231
  },
2232
  "vocative":{
2233
  "p":0.0,
2234
  "r":0.0,
2235
  "f":0.0
2236
+ },
2237
+ "appos":{
2238
+ "p":0.6428571429,
2239
+ "r":0.6428571429,
2240
+ "f":0.6428571429
2241
  }
2242
  },
2243
+ "lemma_acc":0.8340354833,
2244
+ "speed":12550.4676822577,
2245
+ "ents_p":0.7701563395,
2246
+ "ents_r":0.6647121159,
2247
+ "ents_f":0.7135598362,
2248
  "ents_per_type":{
2249
  "OG":{
2250
+ "p":0.6808894231,
2251
+ "r":0.5252665739,
2252
+ "f":0.5930384716
2253
  },
2254
  "PS":{
2255
+ "p":0.7770642202,
2256
+ "r":0.5773687798,
2257
+ "f":0.6624951115
2258
  },
2259
  "QT":{
2260
+ "p":0.8434484905,
2261
+ "r":0.868868227,
2262
+ "f":0.8559696778
2263
  },
2264
  "LC":{
2265
+ "p":0.5856847901,
2266
+ "r":0.5233702337,
2267
+ "f":0.5527768756
2268
  },
2269
  "DT":{
2270
+ "p":0.8166589111,
2271
+ "r":0.7607282185,
2272
+ "f":0.7877019749
2273
  },
2274
  "TI":{
2275
+ "p":0.8952991453,
2276
+ "r":0.7688073394,
2277
+ "f":0.8272458045
2278
  }
2279
  }
2280
  },
morphologizer/cfg CHANGED
@@ -9,6 +9,7 @@
9
  "POS=VERB":"",
10
  "POS=ADJ":"",
11
  "POS=PUNCT":"",
 
12
  "POS=AUX":"",
13
  "POS=PRON":"",
14
  "POS=PROPN":"",
@@ -28,6 +29,7 @@
28
  "POS=VERB":100,
29
  "POS=ADJ":84,
30
  "POS=PUNCT":97,
 
31
  "POS=AUX":87,
32
  "POS=PRON":95,
33
  "POS=PROPN":96,
 
9
  "POS=VERB":"",
10
  "POS=ADJ":"",
11
  "POS=PUNCT":"",
12
+ "POS=SPACE":"",
13
  "POS=AUX":"",
14
  "POS=PRON":"",
15
  "POS=PROPN":"",
 
29
  "POS=VERB":100,
30
  "POS=ADJ":84,
31
  "POS=PUNCT":97,
32
+ "POS=SPACE":103,
33
  "POS=AUX":87,
34
  "POS=PRON":95,
35
  "POS=PROPN":96,
morphologizer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d4dfef2d668edd7aaeb8fbab39adb5925232e1281c0e4dfdb8f8d96d352432a
3
- size 7025
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ce2045adff96e3bdb86778b378d82bd658547eec6ab7ff47576196de360499
3
+ size 7413
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62f5767bee31144379e842d266eee700267837fc3b4486f9704625b21f64ebe1
3
  size 6272282
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:904260ca33ed768cf83c513b0d196063e4569321698899eeecdcc8f1cb0f9537
3
  size 6272282
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves�,{"0":{},"1":{"PS":16127,"QT":13487,"DT":10090,"OG":9793,"LC":8337,"TI":2926},"2":{"PS":16127,"QT":13487,"DT":10090,"OG":9793,"LC":8337,"TI":2926},"3":{"PS":16127,"QT":13487,"DT":10090,"OG":9793,"LC":8337,"TI":2926},"4":{"PS":16127,"QT":13487,"DT":10090,"OG":9793,"LC":8337,"TI":2926,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves�,{"0":{},"1":{"PS":16134,"QT":13491,"DT":10094,"OG":9801,"LC":8341,"TI":2933},"2":{"PS":16134,"QT":13491,"DT":10094,"OG":9801,"LC":8341,"TI":2933},"3":{"PS":16134,"QT":13491,"DT":10094,"OG":9801,"LC":8341,"TI":2933},"4":{"PS":16134,"QT":13491,"DT":10094,"OG":9801,"LC":8341,"TI":2933,"":1},"5":{"":1}}�cfg��neg_key�
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e91d4945fcaea012914bf52396bd9d4ec0a65629041277caf957447e7bc8f629
3
  size 305088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:462e720a54dc098101de8567256d17cae693686b2140d5b2d3e8ad0d2b1ab132
3
  size 305088
parser/moves CHANGED
@@ -1 +1 @@
1
- ��moves��{"0":{"":202496},"1":{"":70942},"2":{"compound":20632,"obj":19819,"nmod":17977,"acl":17852,"advcl":16547,"dislocated":15591,"advmod":15157,"nsubj":14107,"amod":13975,"ccomp":12588,"obl":9740,"det":4235,"cc":3779,"xcomp":3414,"nummod":2654,"dep":2533,"dislocated||conj":2002,"punct":1803,"csubj":1008,"iobj":823,"advmod||conj":769,"ccomp||conj":717,"cc||conj":705,"mark":635,"nmod||conj":606,"advcl||conj":586,"nsubj||conj":521,"acl||conj":425,"obj||conj":287,"amod||conj":278,"compound||conj":184,"xcomp||conj":169,"obl||conj":153,"dep||conj":64,"det||conj":51,"iobj||conj":31},"3":{"punct":31170,"conj":17608,"aux":16034,"fixed":2776,"case":1120,"appos":956,"flat":595,"advmod":311,"cop":268,"dep":0},"4":{"ROOT":23010}}�cfg��neg_key�
 
1
+ ��moves��{"0":{"":202502},"1":{"":72363},"2":{"compound":20632,"obj":19819,"nmod":17977,"acl":17852,"advcl":16547,"dislocated":15591,"advmod":15157,"nsubj":14107,"amod":13975,"ccomp":12588,"obl":9740,"det":4235,"cc":3779,"xcomp":3414,"nummod":2654,"dep":2539,"dislocated||conj":2002,"punct":1803,"csubj":1008,"iobj":823,"advmod||conj":769,"ccomp||conj":717,"cc||conj":705,"mark":635,"nmod||conj":606,"advcl||conj":586,"nsubj||conj":521,"acl||conj":425,"obj||conj":287,"amod||conj":278,"compound||conj":184,"xcomp||conj":169,"obl||conj":153,"dep||conj":64,"det||conj":51,"iobj||conj":31},"3":{"punct":31170,"conj":17608,"aux":16034,"fixed":2776,"dep":1443,"case":1120,"appos":956,"flat":595,"advmod":311,"cop":268},"4":{"ROOT":23010}}�cfg��neg_key�
senter/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6525f344bfbf7236886310578daed1208bb6f99812a57e81458e32aaab85875
3
  size 197089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c3f42c9a0165f02e19a21174df4d1fb4fda8008e61ab09e9485a78c6598f170
3
  size 197089
tagger/cfg CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "labels":[
 
3
  "ecs",
4
  "etm",
5
  "f",
 
1
  {
2
  "labels":[
3
+ "_SP",
4
  "ecs",
5
  "etm",
6
  "f",
tagger/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94c035f00ea44c6c909c3f77aaa9801a70a784af2457d86d3b3d392c2ee7f978
3
- size 766742
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9add975832b2f42c520904a8ef1b8f17fefda77cff5e53f197cbaa744c96cb
3
+ size 767130
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83c5ee1acdaf72dfbb0fc5861a065ba79225ba36aa0170a7b7f00530db11339b
3
  size 6139229
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c2af6b96552cc0686ca14bfa05aa734924d67a05b905791138ed3fffa8df118
3
  size 6139229
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6abded2cba340f092fd47b90dd5d4c640487c814e5ce9d10fe330c0366393429
3
- size 10114332
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f04ab95fb3a6ca98589705130e870b3bbac7239bb1d3367a3993bccf6f3d5212
3
+ size 10081761