firqaaa commited on
Commit
b1b1df9
1 Parent(s): b8ea162

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ id_core_news_sm-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
37
+ parser/model filter=lfs diff=lfs merge=lfs -text
38
+ tok2vec/model filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - token-classification
5
+ language:
6
+ - id
7
+ model-index:
8
+ - name: id_core_news_sm
9
+ results:
10
+ - task:
11
+ name: TAG
12
+ type: token-classification
13
+ metrics:
14
+ - name: TAG (XPOS) Accuracy
15
+ type: accuracy
16
+ value: 0.9051536414
17
+ - task:
18
+ name: POS
19
+ type: token-classification
20
+ metrics:
21
+ - name: POS (UPOS) Accuracy
22
+ type: accuracy
23
+ value: 0.9125297415
24
+ - task:
25
+ name: MORPH
26
+ type: token-classification
27
+ metrics:
28
+ - name: Morph (UFeats) Accuracy
29
+ type: accuracy
30
+ value: 0.9296115526
31
+ - task:
32
+ name: LEMMA
33
+ type: token-classification
34
+ metrics:
35
+ - name: Lemma Accuracy
36
+ type: accuracy
37
+ value: 0.9369920335
38
+ - task:
39
+ name: UNLABELED_DEPENDENCIES
40
+ type: token-classification
41
+ metrics:
42
+ - name: Unlabeled Attachment Score (UAS)
43
+ type: f_score
44
+ value: 0.7753785754
45
+ - task:
46
+ name: LABELED_DEPENDENCIES
47
+ type: token-classification
48
+ metrics:
49
+ - name: Labeled Attachment Score (LAS)
50
+ type: f_score
51
+ value: 0.6871555348
52
+ - task:
53
+ name: SENTS
54
+ type: token-classification
55
+ metrics:
56
+ - name: Sentences F-Score
57
+ type: f_score
58
+ value: 0.857881137
59
+ ---
60
+ | Feature | Description |
61
+ | --- | --- |
62
+ | **Name** | `id_core_news_sm` |
63
+ | **Version** | `0.0.0` |
64
+ | **spaCy** | `>=3.7.2,<3.8.0` |
65
+ | **Default Pipeline** | `tok2vec`, `tagger`, `morphologizer`, `trainable_lemmatizer`, `parser` |
66
+ | **Components** | `tok2vec`, `tagger`, `morphologizer`, `trainable_lemmatizer`, `parser` |
67
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
68
+ | **Sources** | n/a |
69
+ | **License** | n/a |
70
+ | **Author** | [n/a]() |
71
+
72
+ ### Label Scheme
73
+
74
+ <details>
75
+
76
+ <summary>View label scheme (166 labels for 3 components)</summary>
77
+
78
+ | Component | Labels |
79
+ | --- | --- |
80
+ | **`tagger`** | `APP`, `ASP`, `ASP+PS3`, `ASS`, `B--`, `B--+PS3`, `CC-`, `CCONJ`, `CD-`, `CO-`, `D--`, `D--+PS3`, `F--`, `F--+PS2`, `G--`, `G--+PS3`, `H--`, `I--`, `M--`, `M--+PS3`, `NOUN`, `NPD`, `NSD`, `NSD+PS3`, `NSF`, `NSM`, `NUM`, `O--`, `PP1`, `PP2`, `PP3`, `PROPN`, `PS1`, `PS1+VSA`, `PS2`, `PS3`, `R--`, `R--+PS3`, `S--`, `SYM`, `T--`, `VERB`, `VPA`, `VSA`, `VSA+PS2`, `VSA+PS3`, `VSP`, `W--`, `X--`, `Z--` |
81
+ | **`morphologizer`** | `POS=PROPN`, `POS=AUX`, `Definite=Ind\|POS=DET\|PronType=Art`, `Number=Sing\|POS=NOUN`, `POS=PRON\|PronType=Rel`, `Mood=Ind\|POS=VERB\|Voice=Pass`, `POS=ADP`, `POS=PUNCT`, `POS=NOUN`, `POS=ADV`, `POS=CCONJ`, `POS=SCONJ`, `Mood=Ind\|POS=VERB\|Voice=Act`, `POS=VERB`, `POS=DET\|PronType=Tot`, `Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `POS=PRON\|PronType=Prs\|Reflex=Yes`, `POS=DET\|PronType=Dem`, `NumType=Card\|POS=NUM`, `POS=ADJ`, `Number=Plur\|POS=DET\|PronType=Ind`, `NumType=Card\|POS=NUM\|PronType=Tot`, `POS=PART\|Polarity=Neg`, `POS=PRON\|PronType=Int`, `NumType=Ord\|POS=ADJ`, `POS=PART`, `POS=PRON\|PronType=Dem`, `POS=DET\|PronType=Ind`, `Number=Plur\|POS=NOUN`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Form\|PronType=Prs`, `POS=ADV\|PronType=Int`, `Clusivity=In\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Definite=Def\|POS=DET\|PronType=Art`, `POS=SYM`, `Degree=Sup\|POS=ADJ`, `POS=INTJ`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `POS=ADV\|PronType=Ind`, `Number=Sing\|POS=PRON\|Person=3\|Polite=Form\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Infm\|PronType=Prs`, `Number=Sing\|POS=PRON\|PronType=Ind`, `POS=VERB\|Voice=Act`, `POS=DET\|PronType=Emp`, `POS=VERB\|Voice=Pass`, `POS=ADV\|PronType=Dem`, `POS=NOUN\|Typo=Yes`, `POS=ADP\|Typo=Yes`, `Number=Plur\|POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes\|Voice=Pass`, `POS=X`, `POS=PRON\|PronType=Tot`, `POS=SCONJ\|Typo=Yes`, `Number=Plur\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `NumType=Card\|POS=NUM\|Typo=Yes`, `Clusivity=Ex\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Foreign=Yes\|POS=X`, `POS=ADV\|PronType=Rel`, `Mood=Imp\|POS=VERB\|Voice=Act`, `Number=Sing\|POS=NOUN\|Typo=Yes`, `POS=PROPN\|Typo=Yes`, `POS=DET`, `Number=Sing\|POS=DET\|PronType=Ind`, `POS=DET\|PronType=Ind\|Typo=Yes`, `Abbr=Yes\|POS=DET\|PronType=Dem`, `POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes`, `Abbr=Yes\|POS=PROPN`, `Abbr=Yes\|POS=PRON\|PronType=Rel`, `Number=Plur\|POS=PRON\|PronType=Int`, `Abbr=Yes\|POS=PART\|Polarity=Neg`, `POS=ADV\|PronType=Tot`, `Abbr=Yes\|POS=ADV`, `POS=ADV\|Typo=Yes`, `POS=X\|Typo=Yes`, `Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `POS=ADV\|PronType=Int\|Typo=Yes`, `NumType=Ord\|POS=ADJ\|Typo=Yes` |
82
+ | **`parser`** | `ROOT`, `acl`, `acl:relcl`, `advcl`, `advmod`, `advmod:emph`, `amod`, `appos`, `aux`, `case`, `case:adv`, `cc`, `ccomp`, `compound`, `conj`, `cop`, `dep`, `det`, `fixed`, `flat`, `flat:foreign`, `flat:name`, `mark`, `nmod`, `nmod:lmod`, `nmod:poss`, `nmod:tmod`, `nsubj`, `nsubj:pass`, `nummod`, `obj`, `obl`, `obl:agent`, `obl:tmod`, `parataxis`, `punct`, `xcomp` |
83
+
84
+ </details>
85
+
86
+ ### Accuracy
87
+
88
+ | Type | Score |
89
+ | --- | --- |
90
+ | `TAG_ACC` | 90.52 |
91
+ | `POS_ACC` | 91.25 |
92
+ | `MORPH_ACC` | 92.96 |
93
+ | `LEMMA_ACC` | 93.70 |
94
+ | `DEP_UAS` | 77.54 |
95
+ | `DEP_LAS` | 68.72 |
96
+ | `SENTS_P` | 82.72 |
97
+ | `SENTS_R` | 89.09 |
98
+ | `SENTS_F` | 85.79 |
99
+ | `TOK2VEC_LOSS` | 756743.38 |
100
+ | `TAGGER_LOSS` | 73614.38 |
101
+ | `MORPHOLOGIZER_LOSS` | 155689.33 |
102
+ | `TRAINABLE_LEMMATIZER_LOSS` | 35033.93 |
103
+ | `PARSER_LOSS` | 1037857.66 |
config.cfg ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "./id_gsd-ud-train.spacy"
3
+ dev = "./id_gsd-ud-dev.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = null
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "id"
13
+ pipeline = ["tok2vec","tagger","morphologizer","trainable_lemmatizer","parser"]
14
+ batch_size = 1000
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
+
22
+ [components]
23
+
24
+ [components.morphologizer]
25
+ factory = "morphologizer"
26
+ extend = false
27
+ label_smoothing = 0.05
28
+ overwrite = true
29
+ scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
30
+
31
+ [components.morphologizer.model]
32
+ @architectures = "spacy.Tagger.v2"
33
+ nO = null
34
+ normalize = false
35
+
36
+ [components.morphologizer.model.tok2vec]
37
+ @architectures = "spacy.Tok2VecListener.v1"
38
+ width = ${components.tok2vec.model.encode.width}
39
+ upstream = "*"
40
+
41
+ [components.parser]
42
+ factory = "parser"
43
+ learn_tokens = false
44
+ min_action_freq = 30
45
+ moves = null
46
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
47
+ update_with_oracle_cut_size = 100
48
+
49
+ [components.parser.model]
50
+ @architectures = "spacy.TransitionBasedParser.v2"
51
+ state_type = "parser"
52
+ extra_state_tokens = false
53
+ hidden_width = 128
54
+ maxout_pieces = 3
55
+ use_upper = true
56
+ nO = null
57
+
58
+ [components.parser.model.tok2vec]
59
+ @architectures = "spacy.Tok2VecListener.v1"
60
+ width = ${components.tok2vec.model.encode.width}
61
+ upstream = "*"
62
+
63
+ [components.tagger]
64
+ factory = "tagger"
65
+ label_smoothing = 0.05
66
+ neg_prefix = "!"
67
+ overwrite = false
68
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
69
+
70
+ [components.tagger.model]
71
+ @architectures = "spacy.Tagger.v2"
72
+ nO = null
73
+ normalize = false
74
+
75
+ [components.tagger.model.tok2vec]
76
+ @architectures = "spacy.Tok2VecListener.v1"
77
+ width = ${components.tok2vec.model.encode.width}
78
+ upstream = "*"
79
+
80
+ [components.tok2vec]
81
+ factory = "tok2vec"
82
+
83
+ [components.tok2vec.model]
84
+ @architectures = "spacy.Tok2Vec.v2"
85
+
86
+ [components.tok2vec.model.embed]
87
+ @architectures = "spacy.MultiHashEmbed.v2"
88
+ width = ${components.tok2vec.model.encode.width}
89
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
90
+ rows = [5000,1000,2500,2500]
91
+ include_static_vectors = true
92
+
93
+ [components.tok2vec.model.encode]
94
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
95
+ width = 256
96
+ depth = 8
97
+ window_size = 1
98
+ maxout_pieces = 3
99
+
100
+ [components.trainable_lemmatizer]
101
+ factory = "trainable_lemmatizer"
102
+ backoff = "orth"
103
+ min_tree_freq = 3
104
+ overwrite = false
105
+ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
106
+ top_k = 1
107
+
108
+ [components.trainable_lemmatizer.model]
109
+ @architectures = "spacy.Tagger.v2"
110
+ nO = null
111
+ normalize = false
112
+
113
+ [components.trainable_lemmatizer.model.tok2vec]
114
+ @architectures = "spacy.Tok2VecListener.v1"
115
+ width = ${components.tok2vec.model.encode.width}
116
+ upstream = "*"
117
+
118
+ [corpora]
119
+
120
+ [corpora.dev]
121
+ @readers = "spacy.Corpus.v1"
122
+ path = ${paths.dev}
123
+ max_length = 0
124
+ gold_preproc = false
125
+ limit = 0
126
+ augmenter = null
127
+
128
+ [corpora.train]
129
+ @readers = "spacy.Corpus.v1"
130
+ path = ${paths.train}
131
+ max_length = 0
132
+ gold_preproc = false
133
+ limit = 0
134
+ augmenter = null
135
+
136
+ [training]
137
+ dev_corpus = "corpora.dev"
138
+ train_corpus = "corpora.train"
139
+ seed = ${system.seed}
140
+ gpu_allocator = ${system.gpu_allocator}
141
+ dropout = 0.1
142
+ accumulate_gradient = 1
143
+ patience = 1600
144
+ max_epochs = 0
145
+ max_steps = 20000
146
+ eval_frequency = 200
147
+ frozen_components = []
148
+ annotating_components = []
149
+ before_to_disk = null
150
+ before_update = null
151
+
152
+ [training.batcher]
153
+ @batchers = "spacy.batch_by_words.v1"
154
+ discard_oversize = false
155
+ tolerance = 0.2
156
+ get_length = null
157
+
158
+ [training.batcher.size]
159
+ @schedules = "compounding.v1"
160
+ start = 100
161
+ stop = 1000
162
+ compound = 1.001
163
+ t = 0.0
164
+
165
+ [training.logger]
166
+ @loggers = "spacy.ConsoleLogger.v1"
167
+ progress_bar = false
168
+
169
+ [training.optimizer]
170
+ @optimizers = "Adam.v1"
171
+ beta1 = 0.9
172
+ beta2 = 0.999
173
+ L2_is_weight_decay = true
174
+ L2 = 0.01
175
+ grad_clip = 1.0
176
+ use_averages = false
177
+ eps = 0.00000001
178
+ learn_rate = 0.001
179
+
180
+ [training.score_weights]
181
+ tag_acc = 0.26
182
+ pos_acc = 0.12
183
+ morph_acc = 0.12
184
+ morph_per_feat = null
185
+ lemma_acc = 0.26
186
+ dep_uas = 0.12
187
+ dep_las = 0.12
188
+ dep_las_per_type = null
189
+ sents_p = null
190
+ sents_r = null
191
+ sents_f = 0.0
192
+
193
+ [pretraining]
194
+
195
+ [initialize]
196
+ vectors = ${paths.vectors}
197
+ init_tok2vec = ${paths.init_tok2vec}
198
+ vocab_data = null
199
+ lookups = null
200
+ before_init = null
201
+ after_init = null
202
+
203
+ [initialize.components]
204
+
205
+ [initialize.tokenizer]
id_core_news_sm-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a5e1c6460adf7bbaa1cbf52f3ef60fbd73a088fff0128bebefe8f7cde3158c5
3
+ size 34069443
meta.json ADDED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"id",
3
+ "name":"core_news_sm",
4
+ "version":"0.0.0",
5
+ "description":"",
6
+ "author":"",
7
+ "email":"",
8
+ "url":"",
9
+ "license":"",
10
+ "spacy_version":">=3.7.2,<3.8.0",
11
+ "spacy_git_version":"a89eae928",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "tok2vec":[
20
+
21
+ ],
22
+ "tagger":[
23
+ "APP",
24
+ "ASP",
25
+ "ASP+PS3",
26
+ "ASS",
27
+ "B--",
28
+ "B--+PS3",
29
+ "CC-",
30
+ "CCONJ",
31
+ "CD-",
32
+ "CO-",
33
+ "D--",
34
+ "D--+PS3",
35
+ "F--",
36
+ "F--+PS2",
37
+ "G--",
38
+ "G--+PS3",
39
+ "H--",
40
+ "I--",
41
+ "M--",
42
+ "M--+PS3",
43
+ "NOUN",
44
+ "NPD",
45
+ "NSD",
46
+ "NSD+PS3",
47
+ "NSF",
48
+ "NSM",
49
+ "NUM",
50
+ "O--",
51
+ "PP1",
52
+ "PP2",
53
+ "PP3",
54
+ "PROPN",
55
+ "PS1",
56
+ "PS1+VSA",
57
+ "PS2",
58
+ "PS3",
59
+ "R--",
60
+ "R--+PS3",
61
+ "S--",
62
+ "SYM",
63
+ "T--",
64
+ "VERB",
65
+ "VPA",
66
+ "VSA",
67
+ "VSA+PS2",
68
+ "VSA+PS3",
69
+ "VSP",
70
+ "W--",
71
+ "X--",
72
+ "Z--"
73
+ ],
74
+ "morphologizer":[
75
+ "POS=PROPN",
76
+ "POS=AUX",
77
+ "Definite=Ind|POS=DET|PronType=Art",
78
+ "Number=Sing|POS=NOUN",
79
+ "POS=PRON|PronType=Rel",
80
+ "Mood=Ind|POS=VERB|Voice=Pass",
81
+ "POS=ADP",
82
+ "POS=PUNCT",
83
+ "POS=NOUN",
84
+ "POS=ADV",
85
+ "POS=CCONJ",
86
+ "POS=SCONJ",
87
+ "Mood=Ind|POS=VERB|Voice=Act",
88
+ "POS=VERB",
89
+ "POS=DET|PronType=Tot",
90
+ "Number=Sing|POS=PRON|Person=3|PronType=Prs",
91
+ "Number=Plur|POS=PRON|Person=3|PronType=Prs",
92
+ "POS=PRON|PronType=Prs|Reflex=Yes",
93
+ "POS=DET|PronType=Dem",
94
+ "NumType=Card|POS=NUM",
95
+ "POS=ADJ",
96
+ "Number=Plur|POS=DET|PronType=Ind",
97
+ "NumType=Card|POS=NUM|PronType=Tot",
98
+ "POS=PART|Polarity=Neg",
99
+ "POS=PRON|PronType=Int",
100
+ "NumType=Ord|POS=ADJ",
101
+ "POS=PART",
102
+ "POS=PRON|PronType=Dem",
103
+ "POS=DET|PronType=Ind",
104
+ "Number=Plur|POS=NOUN",
105
+ "Number=Sing|POS=PRON|Person=1|Polite=Form|PronType=Prs",
106
+ "POS=ADV|PronType=Int",
107
+ "Clusivity=In|Number=Plur|POS=PRON|Person=1|PronType=Prs",
108
+ "Definite=Def|POS=DET|PronType=Art",
109
+ "POS=SYM",
110
+ "Degree=Sup|POS=ADJ",
111
+ "POS=INTJ",
112
+ "Number=Sing|POS=PRON|Person=2|Polite=Infm|PronType=Prs",
113
+ "POS=ADV|PronType=Ind",
114
+ "Number=Sing|POS=PRON|Person=3|Polite=Form|PronType=Prs",
115
+ "Number=Sing|POS=PRON|Person=1|Polite=Infm|PronType=Prs",
116
+ "Number=Sing|POS=PRON|PronType=Ind",
117
+ "POS=VERB|Voice=Act",
118
+ "POS=DET|PronType=Emp",
119
+ "POS=VERB|Voice=Pass",
120
+ "POS=ADV|PronType=Dem",
121
+ "POS=NOUN|Typo=Yes",
122
+ "POS=ADP|Typo=Yes",
123
+ "Number=Plur|POS=PRON|PronType=Ind",
124
+ "POS=VERB|Typo=Yes|Voice=Pass",
125
+ "POS=X",
126
+ "POS=PRON|PronType=Tot",
127
+ "POS=SCONJ|Typo=Yes",
128
+ "Number=Plur|POS=PRON|Person=2|Polite=Infm|PronType=Prs",
129
+ "NumType=Card|POS=NUM|Typo=Yes",
130
+ "Clusivity=Ex|Number=Plur|POS=PRON|Person=1|PronType=Prs",
131
+ "Number=Sing|POS=PRON|Person=2|Polite=Form|PronType=Prs",
132
+ "Foreign=Yes|POS=X",
133
+ "POS=ADV|PronType=Rel",
134
+ "Mood=Imp|POS=VERB|Voice=Act",
135
+ "Number=Sing|POS=NOUN|Typo=Yes",
136
+ "POS=PROPN|Typo=Yes",
137
+ "POS=DET",
138
+ "Number=Sing|POS=DET|PronType=Ind",
139
+ "POS=DET|PronType=Ind|Typo=Yes",
140
+ "Abbr=Yes|POS=DET|PronType=Dem",
141
+ "POS=PRON|PronType=Ind",
142
+ "POS=VERB|Typo=Yes",
143
+ "Abbr=Yes|POS=PROPN",
144
+ "Abbr=Yes|POS=PRON|PronType=Rel",
145
+ "Number=Plur|POS=PRON|PronType=Int",
146
+ "Abbr=Yes|POS=PART|Polarity=Neg",
147
+ "POS=ADV|PronType=Tot",
148
+ "Abbr=Yes|POS=ADV",
149
+ "POS=ADV|Typo=Yes",
150
+ "POS=X|Typo=Yes",
151
+ "Number=Sing|POS=PRON|Person=2|PronType=Prs",
152
+ "POS=ADV|PronType=Int|Typo=Yes",
153
+ "NumType=Ord|POS=ADJ|Typo=Yes"
154
+ ],
155
+ "parser":[
156
+ "ROOT",
157
+ "acl",
158
+ "acl:relcl",
159
+ "advcl",
160
+ "advmod",
161
+ "advmod:emph",
162
+ "amod",
163
+ "appos",
164
+ "aux",
165
+ "case",
166
+ "case:adv",
167
+ "cc",
168
+ "ccomp",
169
+ "compound",
170
+ "conj",
171
+ "cop",
172
+ "dep",
173
+ "det",
174
+ "fixed",
175
+ "flat",
176
+ "flat:foreign",
177
+ "flat:name",
178
+ "mark",
179
+ "nmod",
180
+ "nmod:lmod",
181
+ "nmod:poss",
182
+ "nmod:tmod",
183
+ "nsubj",
184
+ "nsubj:pass",
185
+ "nummod",
186
+ "obj",
187
+ "obl",
188
+ "obl:agent",
189
+ "obl:tmod",
190
+ "parataxis",
191
+ "punct",
192
+ "xcomp"
193
+ ]
194
+ },
195
+ "pipeline":[
196
+ "tok2vec",
197
+ "tagger",
198
+ "morphologizer",
199
+ "trainable_lemmatizer",
200
+ "parser"
201
+ ],
202
+ "components":[
203
+ "tok2vec",
204
+ "tagger",
205
+ "morphologizer",
206
+ "trainable_lemmatizer",
207
+ "parser"
208
+ ],
209
+ "disabled":[
210
+
211
+ ],
212
+ "performance":{
213
+ "tag_acc":0.9051536414,
214
+ "pos_acc":0.9125297415,
215
+ "morph_acc":0.9296115526,
216
+ "morph_per_feat":{
217
+ "Number":{
218
+ "p":0.9799159271,
219
+ "r":0.8237141735,
220
+ "f":0.8950511945
221
+ },
222
+ "Mood":{
223
+ "p":0.997098646,
224
+ "r":0.9246636771,
225
+ "f":0.959516054
226
+ },
227
+ "Voice":{
228
+ "p":0.9942084942,
229
+ "r":0.9221128021,
230
+ "f":0.9568044589
231
+ },
232
+ "PronType":{
233
+ "p":0.9915662651,
234
+ "r":0.7786187323,
235
+ "f":0.8722840488
236
+ },
237
+ "Polarity":{
238
+ "p":1.0,
239
+ "r":0.862745098,
240
+ "f":0.9263157895
241
+ },
242
+ "Person":{
243
+ "p":1.0,
244
+ "r":0.4039735099,
245
+ "f":0.5754716981
246
+ },
247
+ "NumType":{
248
+ "p":0.9952606635,
249
+ "r":0.9480812641,
250
+ "f":0.9710982659
251
+ },
252
+ "Typo":{
253
+ "p":1.0,
254
+ "r":0.4,
255
+ "f":0.5714285714
256
+ },
257
+ "Definite":{
258
+ "p":0.9838709677,
259
+ "r":0.7922077922,
260
+ "f":0.8776978417
261
+ },
262
+ "Polite":{
263
+ "p":1.0,
264
+ "r":0.71875,
265
+ "f":0.8363636364
266
+ },
267
+ "Reflex":{
268
+ "p":1.0,
269
+ "r":0.5,
270
+ "f":0.6666666667
271
+ },
272
+ "Degree":{
273
+ "p":0.9375,
274
+ "r":0.8823529412,
275
+ "f":0.9090909091
276
+ },
277
+ "Foreign":{
278
+ "p":1.0,
279
+ "r":0.0625,
280
+ "f":0.1176470588
281
+ },
282
+ "Clusivity":{
283
+ "p":1.0,
284
+ "r":1.0,
285
+ "f":1.0
286
+ },
287
+ "Abbr":{
288
+ "p":1.0,
289
+ "r":0.2,
290
+ "f":0.3333333333
291
+ }
292
+ },
293
+ "lemma_acc":0.9369920335,
294
+ "dep_uas":0.7753785754,
295
+ "dep_las":0.6871555348,
296
+ "dep_las_per_type":{
297
+ "nsubj":{
298
+ "p":0.7731092437,
299
+ "r":0.7459459459,
300
+ "f":0.7592847318
301
+ },
302
+ "compound":{
303
+ "p":0.6962676963,
304
+ "r":0.6874205845,
305
+ "f":0.6918158568
306
+ },
307
+ "root":{
308
+ "p":0.7585616438,
309
+ "r":0.7924865832,
310
+ "f":0.7751531059
311
+ },
312
+ "obj":{
313
+ "p":0.7978142077,
314
+ "r":0.7630662021,
315
+ "f":0.7800534283
316
+ },
317
+ "case":{
318
+ "p":0.9049295775,
319
+ "r":0.8877374784,
320
+ "f":0.8962510898
321
+ },
322
+ "obl":{
323
+ "p":0.6753246753,
324
+ "r":0.6409861325,
325
+ "f":0.6577075099
326
+ },
327
+ "amod":{
328
+ "p":0.6421568627,
329
+ "r":0.568329718,
330
+ "f":0.6029919448
331
+ },
332
+ "conj":{
333
+ "p":0.5394736842,
334
+ "r":0.5354477612,
335
+ "f":0.5374531835
336
+ },
337
+ "cc":{
338
+ "p":0.8705882353,
339
+ "r":0.8433048433,
340
+ "f":0.8567293777
341
+ },
342
+ "acl:relcl":{
343
+ "p":0.7086614173,
344
+ "r":0.6428571429,
345
+ "f":0.6741573034
346
+ },
347
+ "flat:name":{
348
+ "p":0.7908571429,
349
+ "r":0.8218527316,
350
+ "f":0.8060570763
351
+ },
352
+ "advmod":{
353
+ "p":0.7546174142,
354
+ "r":0.7132169576,
355
+ "f":0.7333333333
356
+ },
357
+ "nmod":{
358
+ "p":0.6468085106,
359
+ "r":0.5435041716,
360
+ "f":0.5906735751
361
+ },
362
+ "nsubj:pass":{
363
+ "p":0.698630137,
364
+ "r":0.7083333333,
365
+ "f":0.7034482759
366
+ },
367
+ "det":{
368
+ "p":0.8745980707,
369
+ "r":0.7661971831,
370
+ "f":0.8168168168
371
+ },
372
+ "aux":{
373
+ "p":0.9064748201,
374
+ "r":0.9402985075,
375
+ "f":0.9230769231
376
+ },
377
+ "nmod:poss":{
378
+ "p":0.5714285714,
379
+ "r":0.0727272727,
380
+ "f":0.1290322581
381
+ },
382
+ "dep":{
383
+ "p":0.0967741935,
384
+ "r":0.0535714286,
385
+ "f":0.0689655172
386
+ },
387
+ "mark":{
388
+ "p":0.8391959799,
389
+ "r":0.7076271186,
390
+ "f":0.767816092
391
+ },
392
+ "cop":{
393
+ "p":0.9607843137,
394
+ "r":0.9423076923,
395
+ "f":0.9514563107
396
+ },
397
+ "acl":{
398
+ "p":0.2297297297,
399
+ "r":0.3035714286,
400
+ "f":0.2615384615
401
+ },
402
+ "nummod":{
403
+ "p":0.7834224599,
404
+ "r":0.7855227882,
405
+ "f":0.7844712182
406
+ },
407
+ "appos":{
408
+ "p":0.5615384615,
409
+ "r":0.553030303,
410
+ "f":0.5572519084
411
+ },
412
+ "xcomp":{
413
+ "p":0.3630573248,
414
+ "r":0.456,
415
+ "f":0.4042553191
416
+ },
417
+ "ccomp":{
418
+ "p":0.4,
419
+ "r":0.0408163265,
420
+ "f":0.0740740741
421
+ },
422
+ "obl:tmod":{
423
+ "p":0.6551724138,
424
+ "r":0.6229508197,
425
+ "f":0.6386554622
426
+ },
427
+ "advcl":{
428
+ "p":0.2831858407,
429
+ "r":0.2269503546,
430
+ "f":0.2519685039
431
+ },
432
+ "advmod:emph":{
433
+ "p":0.6666666667,
434
+ "r":0.0434782609,
435
+ "f":0.0816326531
436
+ },
437
+ "case:adv":{
438
+ "p":0.6666666667,
439
+ "r":0.4615384615,
440
+ "f":0.5454545455
441
+ },
442
+ "obl:agent":{
443
+ "p":0.0,
444
+ "r":0.0,
445
+ "f":0.0
446
+ },
447
+ "flat":{
448
+ "p":0.5185185185,
449
+ "r":0.3255813953,
450
+ "f":0.4
451
+ },
452
+ "parataxis":{
453
+ "p":0.25,
454
+ "r":0.1020408163,
455
+ "f":0.1449275362
456
+ },
457
+ "nmod:lmod":{
458
+ "p":0.8333333333,
459
+ "r":0.3846153846,
460
+ "f":0.5263157895
461
+ },
462
+ "flat:foreign":{
463
+ "p":0.0,
464
+ "r":0.0,
465
+ "f":0.0
466
+ },
467
+ "nmod:tmod":{
468
+ "p":0.4,
469
+ "r":0.3636363636,
470
+ "f":0.380952381
471
+ },
472
+ "iobj":{
473
+ "p":0.0,
474
+ "r":0.0,
475
+ "f":0.0
476
+ },
477
+ "csubj":{
478
+ "p":0.0,
479
+ "r":0.0,
480
+ "f":0.0
481
+ },
482
+ "fixed":{
483
+ "p":0.5555555556,
484
+ "r":0.3846153846,
485
+ "f":0.4545454545
486
+ },
487
+ "discourse":{
488
+ "p":0.0,
489
+ "r":0.0,
490
+ "f":0.0
491
+ },
492
+ "cc:preconj":{
493
+ "p":0.0,
494
+ "r":0.0,
495
+ "f":0.0
496
+ },
497
+ "compound:a":{
498
+ "p":0.0,
499
+ "r":0.0,
500
+ "f":0.0
501
+ }
502
+ },
503
+ "sents_p":0.8272425249,
504
+ "sents_r":0.8908765653,
505
+ "sents_f":0.857881137,
506
+ "tok2vec_loss":7567.4337583379,
507
+ "tagger_loss":736.1438090745,
508
+ "morphologizer_loss":1556.8932885677,
509
+ "trainable_lemmatizer_loss":350.339323577,
510
+ "parser_loss":10378.5765946195
511
+ },
512
+ "requirements":[
513
+
514
+ ]
515
+ }
morphologizer/cfg ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "extend":false,
3
+ "label_smoothing":0.05,
4
+ "labels_morph":{
5
+ "POS=PROPN":"",
6
+ "POS=AUX":"",
7
+ "Definite=Ind|POS=DET|PronType=Art":"Definite=Ind|PronType=Art",
8
+ "Number=Sing|POS=NOUN":"Number=Sing",
9
+ "POS=PRON|PronType=Rel":"PronType=Rel",
10
+ "Mood=Ind|POS=VERB|Voice=Pass":"Mood=Ind|Voice=Pass",
11
+ "POS=ADP":"",
12
+ "POS=PUNCT":"",
13
+ "POS=NOUN":"",
14
+ "POS=ADV":"",
15
+ "POS=CCONJ":"",
16
+ "POS=SCONJ":"",
17
+ "Mood=Ind|POS=VERB|Voice=Act":"Mood=Ind|Voice=Act",
18
+ "POS=VERB":"",
19
+ "POS=DET|PronType=Tot":"PronType=Tot",
20
+ "Number=Sing|POS=PRON|Person=3|PronType=Prs":"Number=Sing|Person=3|PronType=Prs",
21
+ "Number=Plur|POS=PRON|Person=3|PronType=Prs":"Number=Plur|Person=3|PronType=Prs",
22
+ "POS=PRON|PronType=Prs|Reflex=Yes":"PronType=Prs|Reflex=Yes",
23
+ "POS=DET|PronType=Dem":"PronType=Dem",
24
+ "NumType=Card|POS=NUM":"NumType=Card",
25
+ "POS=ADJ":"",
26
+ "Number=Plur|POS=DET|PronType=Ind":"Number=Plur|PronType=Ind",
27
+ "NumType=Card|POS=NUM|PronType=Tot":"NumType=Card|PronType=Tot",
28
+ "POS=PART|Polarity=Neg":"Polarity=Neg",
29
+ "POS=PRON|PronType=Int":"PronType=Int",
30
+ "NumType=Ord|POS=ADJ":"NumType=Ord",
31
+ "POS=PART":"",
32
+ "POS=PRON|PronType=Dem":"PronType=Dem",
33
+ "POS=DET|PronType=Ind":"PronType=Ind",
34
+ "Number=Plur|POS=NOUN":"Number=Plur",
35
+ "Number=Sing|POS=PRON|Person=1|Polite=Form|PronType=Prs":"Number=Sing|Person=1|Polite=Form|PronType=Prs",
36
+ "POS=ADV|PronType=Int":"PronType=Int",
37
+ "Clusivity=In|Number=Plur|POS=PRON|Person=1|PronType=Prs":"Clusivity=In|Number=Plur|Person=1|PronType=Prs",
38
+ "Definite=Def|POS=DET|PronType=Art":"Definite=Def|PronType=Art",
39
+ "POS=SYM":"",
40
+ "Degree=Sup|POS=ADJ":"Degree=Sup",
41
+ "POS=INTJ":"",
42
+ "Number=Sing|POS=PRON|Person=2|Polite=Infm|PronType=Prs":"Number=Sing|Person=2|Polite=Infm|PronType=Prs",
43
+ "POS=ADV|PronType=Ind":"PronType=Ind",
44
+ "Number=Sing|POS=PRON|Person=3|Polite=Form|PronType=Prs":"Number=Sing|Person=3|Polite=Form|PronType=Prs",
45
+ "Number=Sing|POS=PRON|Person=1|Polite=Infm|PronType=Prs":"Number=Sing|Person=1|Polite=Infm|PronType=Prs",
46
+ "Number=Sing|POS=PRON|PronType=Ind":"Number=Sing|PronType=Ind",
47
+ "POS=VERB|Voice=Act":"Voice=Act",
48
+ "POS=DET|PronType=Emp":"PronType=Emp",
49
+ "POS=VERB|Voice=Pass":"Voice=Pass",
50
+ "POS=ADV|PronType=Dem":"PronType=Dem",
51
+ "POS=NOUN|Typo=Yes":"Typo=Yes",
52
+ "POS=ADP|Typo=Yes":"Typo=Yes",
53
+ "Number=Plur|POS=PRON|PronType=Ind":"Number=Plur|PronType=Ind",
54
+ "POS=VERB|Typo=Yes|Voice=Pass":"Typo=Yes|Voice=Pass",
55
+ "POS=X":"",
56
+ "POS=PRON|PronType=Tot":"PronType=Tot",
57
+ "POS=SCONJ|Typo=Yes":"Typo=Yes",
58
+ "Number=Plur|POS=PRON|Person=2|Polite=Infm|PronType=Prs":"Number=Plur|Person=2|Polite=Infm|PronType=Prs",
59
+ "NumType=Card|POS=NUM|Typo=Yes":"NumType=Card|Typo=Yes",
60
+ "Clusivity=Ex|Number=Plur|POS=PRON|Person=1|PronType=Prs":"Clusivity=Ex|Number=Plur|Person=1|PronType=Prs",
61
+ "Number=Sing|POS=PRON|Person=2|Polite=Form|PronType=Prs":"Number=Sing|Person=2|Polite=Form|PronType=Prs",
62
+ "Foreign=Yes|POS=X":"Foreign=Yes",
63
+ "POS=ADV|PronType=Rel":"PronType=Rel",
64
+ "Mood=Imp|POS=VERB|Voice=Act":"Mood=Imp|Voice=Act",
65
+ "Number=Sing|POS=NOUN|Typo=Yes":"Number=Sing|Typo=Yes",
66
+ "POS=PROPN|Typo=Yes":"Typo=Yes",
67
+ "POS=DET":"",
68
+ "Number=Sing|POS=DET|PronType=Ind":"Number=Sing|PronType=Ind",
69
+ "POS=DET|PronType=Ind|Typo=Yes":"PronType=Ind|Typo=Yes",
70
+ "Abbr=Yes|POS=DET|PronType=Dem":"Abbr=Yes|PronType=Dem",
71
+ "POS=PRON|PronType=Ind":"PronType=Ind",
72
+ "POS=VERB|Typo=Yes":"Typo=Yes",
73
+ "Abbr=Yes|POS=PROPN":"Abbr=Yes",
74
+ "Abbr=Yes|POS=PRON|PronType=Rel":"Abbr=Yes|PronType=Rel",
75
+ "Number=Plur|POS=PRON|PronType=Int":"Number=Plur|PronType=Int",
76
+ "Abbr=Yes|POS=PART|Polarity=Neg":"Abbr=Yes|Polarity=Neg",
77
+ "POS=ADV|PronType=Tot":"PronType=Tot",
78
+ "Abbr=Yes|POS=ADV":"Abbr=Yes",
79
+ "POS=ADV|Typo=Yes":"Typo=Yes",
80
+ "POS=X|Typo=Yes":"Typo=Yes",
81
+ "Number=Sing|POS=PRON|Person=2|PronType=Prs":"Number=Sing|Person=2|PronType=Prs",
82
+ "POS=ADV|PronType=Int|Typo=Yes":"PronType=Int|Typo=Yes",
83
+ "NumType=Ord|POS=ADJ|Typo=Yes":"NumType=Ord|Typo=Yes"
84
+ },
85
+ "labels_pos":{
86
+ "POS=PROPN":96,
87
+ "POS=AUX":87,
88
+ "Definite=Ind|POS=DET|PronType=Art":90,
89
+ "Number=Sing|POS=NOUN":92,
90
+ "POS=PRON|PronType=Rel":95,
91
+ "Mood=Ind|POS=VERB|Voice=Pass":100,
92
+ "POS=ADP":85,
93
+ "POS=PUNCT":97,
94
+ "POS=NOUN":92,
95
+ "POS=ADV":86,
96
+ "POS=CCONJ":89,
97
+ "POS=SCONJ":98,
98
+ "Mood=Ind|POS=VERB|Voice=Act":100,
99
+ "POS=VERB":100,
100
+ "POS=DET|PronType=Tot":90,
101
+ "Number=Sing|POS=PRON|Person=3|PronType=Prs":95,
102
+ "Number=Plur|POS=PRON|Person=3|PronType=Prs":95,
103
+ "POS=PRON|PronType=Prs|Reflex=Yes":95,
104
+ "POS=DET|PronType=Dem":90,
105
+ "NumType=Card|POS=NUM":93,
106
+ "POS=ADJ":84,
107
+ "Number=Plur|POS=DET|PronType=Ind":90,
108
+ "NumType=Card|POS=NUM|PronType=Tot":93,
109
+ "POS=PART|Polarity=Neg":94,
110
+ "POS=PRON|PronType=Int":95,
111
+ "NumType=Ord|POS=ADJ":84,
112
+ "POS=PART":94,
113
+ "POS=PRON|PronType=Dem":95,
114
+ "POS=DET|PronType=Ind":90,
115
+ "Number=Plur|POS=NOUN":92,
116
+ "Number=Sing|POS=PRON|Person=1|Polite=Form|PronType=Prs":95,
117
+ "POS=ADV|PronType=Int":86,
118
+ "Clusivity=In|Number=Plur|POS=PRON|Person=1|PronType=Prs":95,
119
+ "Definite=Def|POS=DET|PronType=Art":90,
120
+ "POS=SYM":99,
121
+ "Degree=Sup|POS=ADJ":84,
122
+ "POS=INTJ":91,
123
+ "Number=Sing|POS=PRON|Person=2|Polite=Infm|PronType=Prs":95,
124
+ "POS=ADV|PronType=Ind":86,
125
+ "Number=Sing|POS=PRON|Person=3|Polite=Form|PronType=Prs":95,
126
+ "Number=Sing|POS=PRON|Person=1|Polite=Infm|PronType=Prs":95,
127
+ "Number=Sing|POS=PRON|PronType=Ind":95,
128
+ "POS=VERB|Voice=Act":100,
129
+ "POS=DET|PronType=Emp":90,
130
+ "POS=VERB|Voice=Pass":100,
131
+ "POS=ADV|PronType=Dem":86,
132
+ "POS=NOUN|Typo=Yes":92,
133
+ "POS=ADP|Typo=Yes":85,
134
+ "Number=Plur|POS=PRON|PronType=Ind":95,
135
+ "POS=VERB|Typo=Yes|Voice=Pass":100,
136
+ "POS=X":101,
137
+ "POS=PRON|PronType=Tot":95,
138
+ "POS=SCONJ|Typo=Yes":98,
139
+ "Number=Plur|POS=PRON|Person=2|Polite=Infm|PronType=Prs":95,
140
+ "NumType=Card|POS=NUM|Typo=Yes":93,
141
+ "Clusivity=Ex|Number=Plur|POS=PRON|Person=1|PronType=Prs":95,
142
+ "Number=Sing|POS=PRON|Person=2|Polite=Form|PronType=Prs":95,
143
+ "Foreign=Yes|POS=X":101,
144
+ "POS=ADV|PronType=Rel":86,
145
+ "Mood=Imp|POS=VERB|Voice=Act":100,
146
+ "Number=Sing|POS=NOUN|Typo=Yes":92,
147
+ "POS=PROPN|Typo=Yes":96,
148
+ "POS=DET":90,
149
+ "Number=Sing|POS=DET|PronType=Ind":90,
150
+ "POS=DET|PronType=Ind|Typo=Yes":90,
151
+ "Abbr=Yes|POS=DET|PronType=Dem":90,
152
+ "POS=PRON|PronType=Ind":95,
153
+ "POS=VERB|Typo=Yes":100,
154
+ "Abbr=Yes|POS=PROPN":96,
155
+ "Abbr=Yes|POS=PRON|PronType=Rel":95,
156
+ "Number=Plur|POS=PRON|PronType=Int":95,
157
+ "Abbr=Yes|POS=PART|Polarity=Neg":94,
158
+ "POS=ADV|PronType=Tot":86,
159
+ "Abbr=Yes|POS=ADV":86,
160
+ "POS=ADV|Typo=Yes":86,
161
+ "POS=X|Typo=Yes":101,
162
+ "Number=Sing|POS=PRON|Person=2|PronType=Prs":95,
163
+ "POS=ADV|PronType=Int|Typo=Yes":86,
164
+ "NumType=Ord|POS=ADJ|Typo=Yes":84
165
+ },
166
+ "overwrite":true
167
+ }
morphologizer/model ADDED
Binary file (81.7 kB). View file
 
parser/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":30,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
parser/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16da2082353eec368fedc44e473009c51f5ee03be764493d78146bf2977b6645
3
+ size 1750016
parser/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves�{"0":{"":36809},"1":{"":56313},"2":{"case":8690,"punct":6049,"nsubj":5534,"advmod":2999,"cc":2690,"nsubj:pass":1726,"mark":1658,"nummod":1525,"det":1313,"aux":921,"obl":852,"cop":845,"amod":398,"obj":251,"advcl":250,"nmod":217,"obl:tmod":211,"compound":124,"xcomp":112,"case:adv":72,"advmod:emph":57,"acl":51,"parataxis":42,"dep":41,"nmod:lmod":37},"3":{"punct":7872,"flat:name":6453,"nmod":6282,"compound":5703,"obj":4488,"obl":3927,"conj":3876,"amod":2949,"acl:relcl":2164,"appos":2119,"det":1663,"nummod":1602,"nmod:poss":1139,"xcomp":1014,"acl":923,"advcl":798,"advmod":400,"dep":359,"ccomp":336,"obl:tmod":319,"flat":305,"advmod:emph":292,"parataxis":274,"fixed":237,"case":174,"cc":134,"nmod:tmod":83,"nsubj":74,"mark":70,"obl:agent":45,"flat:foreign":41,"nsubj:pass":40},"4":{"ROOT":4482}}�cfg��neg_key�
tagger/cfg ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label_smoothing":0.05,
3
+ "labels":[
4
+ "APP",
5
+ "ASP",
6
+ "ASP+PS3",
7
+ "ASS",
8
+ "B--",
9
+ "B--+PS3",
10
+ "CC-",
11
+ "CCONJ",
12
+ "CD-",
13
+ "CO-",
14
+ "D--",
15
+ "D--+PS3",
16
+ "F--",
17
+ "F--+PS2",
18
+ "G--",
19
+ "G--+PS3",
20
+ "H--",
21
+ "I--",
22
+ "M--",
23
+ "M--+PS3",
24
+ "NOUN",
25
+ "NPD",
26
+ "NSD",
27
+ "NSD+PS3",
28
+ "NSF",
29
+ "NSM",
30
+ "NUM",
31
+ "O--",
32
+ "PP1",
33
+ "PP2",
34
+ "PP3",
35
+ "PROPN",
36
+ "PS1",
37
+ "PS1+VSA",
38
+ "PS2",
39
+ "PS3",
40
+ "R--",
41
+ "R--+PS3",
42
+ "S--",
43
+ "SYM",
44
+ "T--",
45
+ "VERB",
46
+ "VPA",
47
+ "VSA",
48
+ "VSA+PS2",
49
+ "VSA+PS3",
50
+ "VSP",
51
+ "W--",
52
+ "X--",
53
+ "Z--"
54
+ ],
55
+ "neg_prefix":"!",
56
+ "overwrite":false
57
+ }
tagger/model ADDED
Binary file (51.8 kB). View file
 
tok2vec/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+
3
+ }
tok2vec/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0c198567f39ac6146d7716317c079ec7977eeac835e0e6d3fb926c4076d8e6
3
+ size 34126801
tokenizer ADDED
The diff for this file is too large to render. See raw diff
 
trainable_lemmatizer/cfg ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ 1,
4
+ 2,
5
+ 4,
6
+ 6,
7
+ 9,
8
+ 11,
9
+ 13,
10
+ 15,
11
+ 17,
12
+ 19,
13
+ 21,
14
+ 22,
15
+ 24,
16
+ 26,
17
+ 28,
18
+ 31,
19
+ 33,
20
+ 36,
21
+ 38,
22
+ 39,
23
+ 40,
24
+ 42,
25
+ 44,
26
+ 46,
27
+ 47,
28
+ 49,
29
+ 51,
30
+ 53,
31
+ 55,
32
+ 57,
33
+ 59,
34
+ 61,
35
+ 63,
36
+ 65,
37
+ 67,
38
+ 68,
39
+ 70,
40
+ 71,
41
+ 73,
42
+ 75,
43
+ 77,
44
+ 64,
45
+ 79,
46
+ 80,
47
+ 83,
48
+ 84,
49
+ 85,
50
+ 87,
51
+ 89,
52
+ 91,
53
+ 93,
54
+ 95,
55
+ 97,
56
+ 98,
57
+ 100,
58
+ 101,
59
+ 102,
60
+ 105,
61
+ 107,
62
+ 108,
63
+ 110,
64
+ 112,
65
+ 114,
66
+ 115,
67
+ 117,
68
+ 118,
69
+ 121,
70
+ 123,
71
+ 124,
72
+ 125,
73
+ 127,
74
+ 128,
75
+ 129,
76
+ 131,
77
+ 18,
78
+ 133,
79
+ 135,
80
+ 137,
81
+ 138,
82
+ 139,
83
+ 141,
84
+ 142,
85
+ 144,
86
+ 145,
87
+ 147,
88
+ 149,
89
+ 151,
90
+ 152,
91
+ 153,
92
+ 154,
93
+ 156,
94
+ 157,
95
+ 159,
96
+ 161,
97
+ 162,
98
+ 164,
99
+ 165,
100
+ 167,
101
+ 168,
102
+ 170,
103
+ 172,
104
+ 174,
105
+ 176,
106
+ 178,
107
+ 179,
108
+ 182,
109
+ 183,
110
+ 185,
111
+ 187,
112
+ 188,
113
+ 190,
114
+ 191,
115
+ 193,
116
+ 194,
117
+ 195,
118
+ 58,
119
+ 197,
120
+ 199,
121
+ 200,
122
+ 201,
123
+ 202,
124
+ 204,
125
+ 206,
126
+ 208,
127
+ 210,
128
+ 211,
129
+ 212,
130
+ 213,
131
+ 214,
132
+ 37,
133
+ 216,
134
+ 217,
135
+ 220,
136
+ 221,
137
+ 76,
138
+ 223,
139
+ 224,
140
+ 225,
141
+ 226,
142
+ 227,
143
+ 228,
144
+ 229,
145
+ 231,
146
+ 234,
147
+ 235,
148
+ 237,
149
+ 239,
150
+ 240,
151
+ 241,
152
+ 243,
153
+ 245,
154
+ 246,
155
+ 248,
156
+ 249,
157
+ 251,
158
+ 253,
159
+ 255,
160
+ 257,
161
+ 259,
162
+ 262,
163
+ 263,
164
+ 264,
165
+ 266,
166
+ 267,
167
+ 268,
168
+ 270,
169
+ 271,
170
+ 56,
171
+ 273,
172
+ 274,
173
+ 275,
174
+ 276,
175
+ 278,
176
+ 279,
177
+ 280,
178
+ 282,
179
+ 283,
180
+ 284,
181
+ 286,
182
+ 287,
183
+ 288,
184
+ 289,
185
+ 291,
186
+ 293,
187
+ 295,
188
+ 297,
189
+ 298,
190
+ 299,
191
+ 16,
192
+ 300,
193
+ 301,
194
+ 302,
195
+ 304,
196
+ 305,
197
+ 307,
198
+ 309,
199
+ 109,
200
+ 310,
201
+ 311,
202
+ 313,
203
+ 315,
204
+ 317,
205
+ 318,
206
+ 66,
207
+ 319,
208
+ 62,
209
+ 321,
210
+ 322,
211
+ 323,
212
+ 324,
213
+ 326,
214
+ 328,
215
+ 330,
216
+ 331,
217
+ 333,
218
+ 334,
219
+ 60,
220
+ 50,
221
+ 336,
222
+ 337,
223
+ 338,
224
+ 339,
225
+ 340,
226
+ 341,
227
+ 342,
228
+ 343,
229
+ 345,
230
+ 346,
231
+ 209,
232
+ 348,
233
+ 349,
234
+ 350,
235
+ 351,
236
+ 352,
237
+ 354,
238
+ 356,
239
+ 358,
240
+ 359,
241
+ 360,
242
+ 361,
243
+ 0,
244
+ 362,
245
+ 363,
246
+ 365,
247
+ 367,
248
+ 369,
249
+ 371,
250
+ 372,
251
+ 373,
252
+ 375,
253
+ 377,
254
+ 378,
255
+ 381,
256
+ 382,
257
+ 69,
258
+ 383,
259
+ 386,
260
+ 387,
261
+ 389,
262
+ 390,
263
+ 391,
264
+ 392,
265
+ 393,
266
+ 394,
267
+ 396,
268
+ 397,
269
+ 399,
270
+ 400,
271
+ 401,
272
+ 402,
273
+ 403,
274
+ 404,
275
+ 405,
276
+ 407,
277
+ 408,
278
+ 409,
279
+ 411,
280
+ 413,
281
+ 414,
282
+ 416,
283
+ 418,
284
+ 419,
285
+ 420,
286
+ 421,
287
+ 422,
288
+ 423,
289
+ 424,
290
+ 425,
291
+ 426,
292
+ 427,
293
+ 429,
294
+ 431,
295
+ 432,
296
+ 435,
297
+ 436,
298
+ 437,
299
+ 438,
300
+ 439,
301
+ 440,
302
+ 441,
303
+ 443,
304
+ 444,
305
+ 446,
306
+ 448,
307
+ 449,
308
+ 450,
309
+ 452,
310
+ 453,
311
+ 455,
312
+ 457,
313
+ 459,
314
+ 460,
315
+ 461,
316
+ 462,
317
+ 463,
318
+ 464,
319
+ 466,
320
+ 468,
321
+ 92,
322
+ 469,
323
+ 470,
324
+ 472,
325
+ 474,
326
+ 475,
327
+ 476,
328
+ 477,
329
+ 478,
330
+ 480,
331
+ 482,
332
+ 483,
333
+ 484,
334
+ 485,
335
+ 486
336
+ ]
337
+ }
trainable_lemmatizer/model ADDED
Binary file (343 kB). View file
 
trainable_lemmatizer/trees ADDED
Binary file (85.4 kB). View file
 
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab/vectors ADDED
Binary file (128 Bytes). View file
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }