iproskurina
commited on
Commit
•
59d18c0
1
Parent(s):
d387cbe
Update spaCy pipeline
Browse files- en_grammar_checker-any-py3-none-any.whl +2 -2
- meta.json +17 -17
- training/articles/config.cfg +6 -4
- training/articles/meta.json +6 -4
- training/articles/spancat/cfg +2 -1
- training/articles/spancat/model +2 -2
- training/articles/tokenizer +2 -2
- training/articles/transformer/model +2 -2
- training/articles/vocab/strings.json +2 -2
- training/grammar_major/config.cfg +10 -8
- training/grammar_major/meta.json +6 -4
- training/grammar_major/spancat/cfg +2 -1
- training/grammar_major/spancat/model +2 -2
- training/grammar_major/tokenizer +2 -2
- training/grammar_major/transformer/model +2 -2
- training/grammar_major/vocab/strings.json +2 -2
- training/grammar_minor/config.cfg +8 -8
- training/grammar_minor/meta.json +4 -6
- training/grammar_minor/spancat/cfg +1 -2
- training/grammar_minor/spancat/model +2 -2
- training/grammar_minor/tokenizer +2 -2
- training/grammar_minor/transformer/model +2 -2
- training/grammar_minor/vocab/strings.json +2 -2
- training/punctuation/config.cfg +4 -3
- training/punctuation/meta.json +3 -3
- training/punctuation/spancat/model +1 -1
- training/punctuation/tokenizer +2 -2
- training/punctuation/transformer/model +2 -2
- training/punctuation/vocab/strings.json +2 -2
- training/spelling/config.cfg +6 -5
- training/spelling/meta.json +5 -5
- training/spelling/spancat/cfg +2 -2
- training/spelling/spancat/model +1 -1
- training/spelling/tokenizer +2 -2
- training/spelling/transformer/model +2 -2
- training/spelling/vocab/strings.json +2 -2
- training/vocabulary/config.cfg +9 -7
- training/vocabulary/meta.json +7 -5
- training/vocabulary/spancat/cfg +3 -2
- training/vocabulary/spancat/model +2 -2
- training/vocabulary/tokenizer +2 -2
- training/vocabulary/transformer/model +2 -2
- training/vocabulary/vocab/strings.json +2 -2
en_grammar_checker-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:022e2aa7c5fe1b54f173deb7dd82b65f03bd5cc28900a0a0caadfebb6a377338
|
3 |
+
size 2441455340
|
meta.json
CHANGED
@@ -7,16 +7,21 @@
|
|
7 |
"email":"",
|
8 |
"url":"",
|
9 |
"license":"CC BY-SA 3.0",
|
10 |
-
"spacy_version":">=3.5.0",
|
11 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
"vectors":{
|
13 |
"width":0,
|
14 |
"vectors":0,
|
15 |
"keys":0,
|
16 |
"name":null
|
17 |
-
},
|
18 |
-
"labels":{
|
19 |
-
|
20 |
},
|
21 |
"pipeline":[
|
22 |
"punctuation",
|
@@ -34,18 +39,9 @@
|
|
34 |
"grammar_minor",
|
35 |
"vocabulary"
|
36 |
],
|
37 |
-
"
|
38 |
|
39 |
-
|
40 |
-
"parent_package":"spacy",
|
41 |
-
"requirements":[
|
42 |
-
"spacy-transformers>=1.0.0"
|
43 |
-
],
|
44 |
-
"sources":[
|
45 |
-
{
|
46 |
-
"license":"MIT"
|
47 |
-
}
|
48 |
-
],
|
49 |
"performance":{
|
50 |
"spans_articles_p":0.8274481176,
|
51 |
"spans_articles_r":0.8197202306,
|
@@ -149,5 +145,9 @@
|
|
149 |
}
|
150 |
}
|
151 |
},
|
152 |
-
"speed":5613.7287946692
|
|
|
|
|
|
|
|
|
153 |
}
|
|
|
7 |
"email":"",
|
8 |
"url":"",
|
9 |
"license":"CC BY-SA 3.0",
|
10 |
+
"spacy_version":">=3.5.0,<3.6.0",
|
11 |
+
"parent_package":"spacy",
|
12 |
+
"requirements":[
|
13 |
+
"spacy-transformers>=1.0.0"
|
14 |
+
],
|
15 |
+
"sources":[
|
16 |
+
{
|
17 |
+
"license":"MIT"
|
18 |
+
}
|
19 |
+
],
|
20 |
"vectors":{
|
21 |
"width":0,
|
22 |
"vectors":0,
|
23 |
"keys":0,
|
24 |
"name":null
|
|
|
|
|
|
|
25 |
},
|
26 |
"pipeline":[
|
27 |
"punctuation",
|
|
|
39 |
"grammar_minor",
|
40 |
"vocabulary"
|
41 |
],
|
42 |
+
"labels":{
|
43 |
|
44 |
+
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
"performance":{
|
46 |
"spans_articles_p":0.8274481176,
|
47 |
"spans_articles_r":0.8197202306,
|
|
|
145 |
}
|
146 |
}
|
147 |
},
|
148 |
+
"speed":5613.7287946692,
|
149 |
+
"spacy_git_version":"61dfdd9fb",
|
150 |
+
"disabled":[
|
151 |
+
|
152 |
+
]
|
153 |
}
|
training/articles/config.cfg
CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
-
patience =
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
-
eval_frequency =
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
|
|
106 |
|
107 |
[training.batcher]
|
108 |
@batchers = "spacy.batch_by_padded.v1"
|
@@ -113,7 +114,7 @@ get_length = null
|
|
113 |
|
114 |
[training.logger]
|
115 |
@loggers = "spacy.WandbLogger.v3"
|
116 |
-
project_name = "
|
117 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
118 |
model_log_interval = 1000
|
119 |
log_dataset_dir = null
|
@@ -140,7 +141,8 @@ initial_rate = 0.00005
|
|
140 |
spans_sc_f = 0.5
|
141 |
spans_sc_p = 0.0
|
142 |
spans_sc_r = 0.0
|
143 |
-
spans_Articles_f = 0.
|
|
|
144 |
|
145 |
[pretraining]
|
146 |
|
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
+
patience = 3000
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
+
eval_frequency = 500
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
106 |
+
before_update = null
|
107 |
|
108 |
[training.batcher]
|
109 |
@batchers = "spacy.batch_by_padded.v1"
|
|
|
114 |
|
115 |
[training.logger]
|
116 |
@loggers = "spacy.WandbLogger.v3"
|
117 |
+
project_name = "grammar_checker"
|
118 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
119 |
model_log_interval = 1000
|
120 |
log_dataset_dir = null
|
|
|
141 |
spans_sc_f = 0.5
|
142 |
spans_sc_p = 0.0
|
143 |
spans_sc_r = 0.0
|
144 |
+
spans_Articles_f = 0.01
|
145 |
+
spans_Determiners_f = 0.49
|
146 |
|
147 |
[pretraining]
|
148 |
|
training/articles/meta.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
-
"spacy_version":">=3.
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
@@ -21,7 +21,8 @@
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
24 |
-
"Articles"
|
|
|
25 |
]
|
26 |
},
|
27 |
"pipeline":[
|
@@ -40,7 +41,8 @@
|
|
40 |
"spans_sc_p":0.0,
|
41 |
"spans_sc_r":0.0,
|
42 |
"spans_Articles_f":0.0,
|
43 |
-
"
|
44 |
-
"
|
|
|
45 |
}
|
46 |
}
|
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
+
"spacy_version":">=3.5.0,<3.6.0",
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
24 |
+
"Articles",
|
25 |
+
"Determiners"
|
26 |
]
|
27 |
},
|
28 |
"pipeline":[
|
|
|
41 |
"spans_sc_p":0.0,
|
42 |
"spans_sc_r":0.0,
|
43 |
"spans_Articles_f":0.0,
|
44 |
+
"spans_Determiners_f":0.0,
|
45 |
+
"transformer_loss":105.063624234,
|
46 |
+
"spancat_loss":5765.2548046919
|
47 |
}
|
48 |
}
|
training/articles/spancat/cfg
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
{
|
2 |
"labels":[
|
3 |
-
"Articles"
|
|
|
4 |
],
|
5 |
"spans_key":"articles",
|
6 |
"threshold":0.5,
|
|
|
1 |
{
|
2 |
"labels":[
|
3 |
+
"Articles",
|
4 |
+
"Determiners"
|
5 |
],
|
6 |
"spans_key":"articles",
|
7 |
"threshold":0.5,
|
training/articles/spancat/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c35e698ee5d227a921ebfc90e99e502db197e5c2897b6c574f14a32e8630821
|
3 |
+
size 4724007
|
training/articles/tokenizer
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
|
3 |
+
size 77066
|
training/articles/transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ffd19d8ecc91a52ab4494d5ed5dce4469a3ae220895b98c5b63705fadac330b
|
3 |
+
size 434208137
|
training/articles/vocab/strings.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:121520107d12556bee02dbe233c8b08eab86d33fd2839eaf7505679d63227ad9
|
3 |
+
size 289348
|
training/grammar_major/config.cfg
CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
-
patience =
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
-
eval_frequency =
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
|
|
106 |
|
107 |
[training.batcher]
|
108 |
@batchers = "spacy.batch_by_padded.v1"
|
@@ -113,7 +114,7 @@ get_length = null
|
|
113 |
|
114 |
[training.logger]
|
115 |
@loggers = "spacy.WandbLogger.v3"
|
116 |
-
project_name = "
|
117 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
118 |
model_log_interval = 1000
|
119 |
log_dataset_dir = null
|
@@ -137,13 +138,14 @@ total_steps = 20000
|
|
137 |
initial_rate = 0.00005
|
138 |
|
139 |
[training.score_weights]
|
140 |
-
spans_sc_f = 0.
|
141 |
spans_sc_p = 0.0
|
142 |
spans_sc_r = 0.0
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
147 |
|
148 |
[pretraining]
|
149 |
|
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
+
patience = 3000
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
+
eval_frequency = 500
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
106 |
+
before_update = null
|
107 |
|
108 |
[training.batcher]
|
109 |
@batchers = "spacy.batch_by_padded.v1"
|
|
|
114 |
|
115 |
[training.logger]
|
116 |
@loggers = "spacy.WandbLogger.v3"
|
117 |
+
project_name = "grammar_checker"
|
118 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
119 |
model_log_interval = 1000
|
120 |
log_dataset_dir = null
|
|
|
138 |
initial_rate = 0.00005
|
139 |
|
140 |
[training.score_weights]
|
141 |
+
spans_sc_f = 0.51
|
142 |
spans_sc_p = 0.0
|
143 |
spans_sc_r = 0.0
|
144 |
+
spans_Absence_comp_sent_f = 0.15
|
145 |
+
spans_Agreement_errors_f = 0.09
|
146 |
+
spans_Prepositions_f = 0.08
|
147 |
+
spans_Redundant_comp_f = 0.1
|
148 |
+
spans_Tense_choice_f = 0.07
|
149 |
|
150 |
[pretraining]
|
151 |
|
training/grammar_major/meta.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
-
"spacy_version":">=3.
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
@@ -21,8 +21,9 @@
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
24 |
-
"
|
25 |
"Prepositions",
|
|
|
26 |
"Tense_choice",
|
27 |
"Redundant_comp"
|
28 |
]
|
@@ -42,11 +43,12 @@
|
|
42 |
"spans_sc_f":0.0,
|
43 |
"spans_sc_p":0.0,
|
44 |
"spans_sc_r":0.0,
|
|
|
45 |
"spans_Agreement_errors_f":0.0,
|
46 |
"spans_Prepositions_f":0.0,
|
47 |
"spans_Redundant_comp_f":0.0,
|
48 |
"spans_Tense_choice_f":0.0,
|
49 |
-
"transformer_loss":
|
50 |
-
"spancat_loss":
|
51 |
}
|
52 |
}
|
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
+
"spacy_version":">=3.5.0,<3.6.0",
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
24 |
+
"Absence_comp_sent",
|
25 |
"Prepositions",
|
26 |
+
"Agreement_errors",
|
27 |
"Tense_choice",
|
28 |
"Redundant_comp"
|
29 |
]
|
|
|
43 |
"spans_sc_f":0.0,
|
44 |
"spans_sc_p":0.0,
|
45 |
"spans_sc_r":0.0,
|
46 |
+
"spans_Absence_comp_sent_f":0.0,
|
47 |
"spans_Agreement_errors_f":0.0,
|
48 |
"spans_Prepositions_f":0.0,
|
49 |
"spans_Redundant_comp_f":0.0,
|
50 |
"spans_Tense_choice_f":0.0,
|
51 |
+
"transformer_loss":216.1842367096,
|
52 |
+
"spancat_loss":10959.7070766776
|
53 |
}
|
54 |
}
|
training/grammar_major/spancat/cfg
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
{
|
2 |
"labels":[
|
3 |
-
"
|
4 |
"Prepositions",
|
|
|
5 |
"Tense_choice",
|
6 |
"Redundant_comp"
|
7 |
],
|
|
|
1 |
{
|
2 |
"labels":[
|
3 |
+
"Absence_comp_sent",
|
4 |
"Prepositions",
|
5 |
+
"Agreement_errors",
|
6 |
"Tense_choice",
|
7 |
"Redundant_comp"
|
8 |
],
|
training/grammar_major/spancat/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cca1ce63e726d8e2d50e58a0ed7306d96a3f65a97192d5dd296ccc57dd65b685
|
3 |
+
size 4725555
|
training/grammar_major/tokenizer
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
|
3 |
+
size 77066
|
training/grammar_major/transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09516ade4845a1fd132ee39328e46af1fe8c75178a3b7d87f717a9586e62a23d
|
3 |
+
size 434208137
|
training/grammar_major/vocab/strings.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8f06faf2807cf7c9eba22f9a969617b204b21fa9181a0392919a6ecd8a24f5b
|
3 |
+
size 182743
|
training/grammar_minor/config.cfg
CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
-
patience =
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
-
eval_frequency =
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
|
|
106 |
|
107 |
[training.batcher]
|
108 |
@batchers = "spacy.batch_by_padded.v1"
|
@@ -113,7 +114,7 @@ get_length = null
|
|
113 |
|
114 |
[training.logger]
|
115 |
@loggers = "spacy.WandbLogger.v3"
|
116 |
-
project_name = "
|
117 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
118 |
model_log_interval = 1000
|
119 |
log_dataset_dir = null
|
@@ -140,11 +141,10 @@ initial_rate = 0.00005
|
|
140 |
spans_sc_f = 0.5
|
141 |
spans_sc_p = 0.0
|
142 |
spans_sc_r = 0.0
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
spans_Word_order_f = 0.07
|
148 |
|
149 |
[pretraining]
|
150 |
|
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
+
patience = 3000
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
+
eval_frequency = 500
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
106 |
+
before_update = null
|
107 |
|
108 |
[training.batcher]
|
109 |
@batchers = "spacy.batch_by_padded.v1"
|
|
|
114 |
|
115 |
[training.logger]
|
116 |
@loggers = "spacy.WandbLogger.v3"
|
117 |
+
project_name = "grammar_checker"
|
118 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
119 |
model_log_interval = 1000
|
120 |
log_dataset_dir = null
|
|
|
141 |
spans_sc_f = 0.5
|
142 |
spans_sc_p = 0.0
|
143 |
spans_sc_r = 0.0
|
144 |
+
spans_Noun_number_f = 0.02
|
145 |
+
spans_Numerals_f = 0.19
|
146 |
+
spans_Verb_pattern_f = 0.19
|
147 |
+
spans_Word_order_f = 0.1
|
|
|
148 |
|
149 |
[pretraining]
|
150 |
|
training/grammar_minor/meta.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
-
"spacy_version":">=3.
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
@@ -21,10 +21,9 @@
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
|
|
24 |
"Verb_pattern",
|
25 |
-
"Determiners",
|
26 |
"Numerals",
|
27 |
-
"Word_order",
|
28 |
"Noun_number"
|
29 |
]
|
30 |
},
|
@@ -43,12 +42,11 @@
|
|
43 |
"spans_sc_f":0.0,
|
44 |
"spans_sc_p":0.0,
|
45 |
"spans_sc_r":0.0,
|
46 |
-
"spans_Determiners_f":0.0,
|
47 |
"spans_Noun_number_f":0.0,
|
48 |
"spans_Numerals_f":0.0,
|
49 |
"spans_Verb_pattern_f":0.0,
|
50 |
"spans_Word_order_f":0.0,
|
51 |
-
"transformer_loss":
|
52 |
-
"spancat_loss":
|
53 |
}
|
54 |
}
|
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
+
"spacy_version":">=3.5.0,<3.6.0",
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
24 |
+
"Word_order",
|
25 |
"Verb_pattern",
|
|
|
26 |
"Numerals",
|
|
|
27 |
"Noun_number"
|
28 |
]
|
29 |
},
|
|
|
42 |
"spans_sc_f":0.0,
|
43 |
"spans_sc_p":0.0,
|
44 |
"spans_sc_r":0.0,
|
|
|
45 |
"spans_Noun_number_f":0.0,
|
46 |
"spans_Numerals_f":0.0,
|
47 |
"spans_Verb_pattern_f":0.0,
|
48 |
"spans_Word_order_f":0.0,
|
49 |
+
"transformer_loss":99.9441129311,
|
50 |
+
"spancat_loss":3890.6939574572
|
51 |
}
|
52 |
}
|
training/grammar_minor/spancat/cfg
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
{
|
2 |
"labels":[
|
|
|
3 |
"Verb_pattern",
|
4 |
-
"Determiners",
|
5 |
"Numerals",
|
6 |
-
"Word_order",
|
7 |
"Noun_number"
|
8 |
],
|
9 |
"spans_key":"grammar_minor",
|
|
|
1 |
{
|
2 |
"labels":[
|
3 |
+
"Word_order",
|
4 |
"Verb_pattern",
|
|
|
5 |
"Numerals",
|
|
|
6 |
"Noun_number"
|
7 |
],
|
8 |
"spans_key":"grammar_minor",
|
training/grammar_minor/spancat/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d61ee1924aa0506de865309bed18c6971257a33d78a8818bceadf0bf40302019
|
3 |
+
size 4725039
|
training/grammar_minor/tokenizer
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
|
3 |
+
size 77066
|
training/grammar_minor/transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d42e4d76dc45e2eb8b60a150e5e0445a1ed86933a2f45007babeefa73b934ed
|
3 |
+
size 434208137
|
training/grammar_minor/vocab/strings.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fa56d47ed9e1b90027c9fe2cc5bf277964169dfa5d986559a698c63fa81ae72
|
3 |
+
size 198085
|
training/punctuation/config.cfg
CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
-
patience =
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
-
eval_frequency =
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
|
|
106 |
|
107 |
[training.batcher]
|
108 |
@batchers = "spacy.batch_by_padded.v1"
|
@@ -113,7 +114,7 @@ get_length = null
|
|
113 |
|
114 |
[training.logger]
|
115 |
@loggers = "spacy.WandbLogger.v3"
|
116 |
-
project_name = "
|
117 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
118 |
model_log_interval = 1000
|
119 |
log_dataset_dir = null
|
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
+
patience = 3000
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
+
eval_frequency = 500
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
106 |
+
before_update = null
|
107 |
|
108 |
[training.batcher]
|
109 |
@batchers = "spacy.batch_by_padded.v1"
|
|
|
114 |
|
115 |
[training.logger]
|
116 |
@loggers = "spacy.WandbLogger.v3"
|
117 |
+
project_name = "grammar_checker"
|
118 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
119 |
model_log_interval = 1000
|
120 |
log_dataset_dir = null
|
training/punctuation/meta.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
-
"spacy_version":">=3.
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
@@ -40,7 +40,7 @@
|
|
40 |
"spans_sc_p":0.0,
|
41 |
"spans_sc_r":0.0,
|
42 |
"spans_Punctuation_f":0.0,
|
43 |
-
"transformer_loss":
|
44 |
-
"spancat_loss":
|
45 |
}
|
46 |
}
|
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
+
"spacy_version":">=3.5.0,<3.6.0",
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
|
|
40 |
"spans_sc_p":0.0,
|
41 |
"spans_sc_r":0.0,
|
42 |
"spans_Punctuation_f":0.0,
|
43 |
+
"transformer_loss":172.0570048221,
|
44 |
+
"spancat_loss":3528.349909951
|
45 |
}
|
46 |
}
|
training/punctuation/spancat/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4723491
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03b50aed75d78f66bc433cddd3fe327f35b211ff17db30a5ce07f2c2663db003
|
3 |
size 4723491
|
training/punctuation/tokenizer
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
|
3 |
+
size 77066
|
training/punctuation/transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de2907121fc5ce12a161537cd083df61e6c2e6d584a49e5171cac51c2ced0b32
|
3 |
+
size 434208137
|
training/punctuation/vocab/strings.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8cf3162ed4f20a4c130de403ffbbb31eceb774b687e37e2a6d2309d2234a761
|
3 |
+
size 139157
|
training/spelling/config.cfg
CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
-
patience =
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
-
eval_frequency =
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
|
|
106 |
|
107 |
[training.batcher]
|
108 |
@batchers = "spacy.batch_by_padded.v1"
|
@@ -113,7 +114,7 @@ get_length = null
|
|
113 |
|
114 |
[training.logger]
|
115 |
@loggers = "spacy.WandbLogger.v3"
|
116 |
-
project_name = "
|
117 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
118 |
model_log_interval = 1000
|
119 |
log_dataset_dir = null
|
@@ -140,8 +141,8 @@ initial_rate = 0.00005
|
|
140 |
spans_sc_f = 0.5
|
141 |
spans_sc_p = 0.0
|
142 |
spans_sc_r = 0.0
|
143 |
-
spans_Capitalisation_f = 0.
|
144 |
-
spans_Spelling_f = 0.
|
145 |
|
146 |
[pretraining]
|
147 |
|
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
+
patience = 3000
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
+
eval_frequency = 500
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
106 |
+
before_update = null
|
107 |
|
108 |
[training.batcher]
|
109 |
@batchers = "spacy.batch_by_padded.v1"
|
|
|
114 |
|
115 |
[training.logger]
|
116 |
@loggers = "spacy.WandbLogger.v3"
|
117 |
+
project_name = "grammar_checker"
|
118 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
119 |
model_log_interval = 1000
|
120 |
log_dataset_dir = null
|
|
|
141 |
spans_sc_f = 0.5
|
142 |
spans_sc_p = 0.0
|
143 |
spans_sc_r = 0.0
|
144 |
+
spans_Capitalisation_f = 0.46
|
145 |
+
spans_Spelling_f = 0.04
|
146 |
|
147 |
[pretraining]
|
148 |
|
training/spelling/meta.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
-
"spacy_version":">=3.
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
@@ -21,8 +21,8 @@
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
24 |
-
"
|
25 |
-
"
|
26 |
]
|
27 |
},
|
28 |
"pipeline":[
|
@@ -42,7 +42,7 @@
|
|
42 |
"spans_sc_r":0.0,
|
43 |
"spans_Capitalisation_f":0.0,
|
44 |
"spans_Spelling_f":0.0,
|
45 |
-
"transformer_loss":
|
46 |
-
"spancat_loss":
|
47 |
}
|
48 |
}
|
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
+
"spacy_version":">=3.5.0,<3.6.0",
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
24 |
+
"Capitalisation",
|
25 |
+
"Spelling"
|
26 |
]
|
27 |
},
|
28 |
"pipeline":[
|
|
|
42 |
"spans_sc_r":0.0,
|
43 |
"spans_Capitalisation_f":0.0,
|
44 |
"spans_Spelling_f":0.0,
|
45 |
+
"transformer_loss":66.6583146576,
|
46 |
+
"spancat_loss":3508.9703819749
|
47 |
}
|
48 |
}
|
training/spelling/spancat/cfg
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"labels":[
|
3 |
-
"
|
4 |
-
"
|
5 |
],
|
6 |
"spans_key":"spelling",
|
7 |
"threshold":0.5,
|
|
|
1 |
{
|
2 |
"labels":[
|
3 |
+
"Capitalisation",
|
4 |
+
"Spelling"
|
5 |
],
|
6 |
"spans_key":"spelling",
|
7 |
"threshold":0.5,
|
training/spelling/spancat/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4724007
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f921a2fffab88db8651cf3858d1acac3c73c26e5014a1fc55da21bb39ac72fe
|
3 |
size 4724007
|
training/spelling/tokenizer
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
|
3 |
+
size 77066
|
training/spelling/transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7d615362cf525b2373d9237d580f6b0f34e79b63c25e47be2d4b075de43cfe4
|
3 |
+
size 434208137
|
training/spelling/vocab/strings.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:789839bc7c391d42d0ee973fbde3d4fefe329020e0410d3341bd023ef3953dcc
|
3 |
+
size 597048
|
training/vocabulary/config.cfg
CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
-
patience =
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
-
eval_frequency =
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
|
|
106 |
|
107 |
[training.batcher]
|
108 |
@batchers = "spacy.batch_by_padded.v1"
|
@@ -113,7 +114,7 @@ get_length = null
|
|
113 |
|
114 |
[training.logger]
|
115 |
@loggers = "spacy.WandbLogger.v3"
|
116 |
-
project_name = "
|
117 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
118 |
model_log_interval = 1000
|
119 |
log_dataset_dir = null
|
@@ -140,10 +141,11 @@ initial_rate = 0.00005
|
|
140 |
spans_sc_f = 0.5
|
141 |
spans_sc_p = 0.0
|
142 |
spans_sc_r = 0.0
|
143 |
-
spans_Category_confusion_f = 0.
|
144 |
-
spans_Formational_affixes_f = 0.
|
145 |
-
|
146 |
-
|
|
|
147 |
|
148 |
[pretraining]
|
149 |
|
|
|
97 |
seed = 0
|
98 |
gpu_allocator = "pytorch"
|
99 |
dropout = 0.1
|
100 |
+
patience = 3000
|
101 |
max_epochs = 0
|
102 |
max_steps = 20000
|
103 |
+
eval_frequency = 500
|
104 |
annotating_components = []
|
105 |
before_to_disk = null
|
106 |
+
before_update = null
|
107 |
|
108 |
[training.batcher]
|
109 |
@batchers = "spacy.batch_by_padded.v1"
|
|
|
114 |
|
115 |
[training.logger]
|
116 |
@loggers = "spacy.WandbLogger.v3"
|
117 |
+
project_name = "grammar_checker"
|
118 |
remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
|
119 |
model_log_interval = 1000
|
120 |
log_dataset_dir = null
|
|
|
141 |
spans_sc_f = 0.5
|
142 |
spans_sc_p = 0.0
|
143 |
spans_sc_r = 0.0
|
144 |
+
spans_Category_confusion_f = 0.09
|
145 |
+
spans_Formational_affixes_f = 0.18
|
146 |
+
spans_Word_choice_f = 0.12
|
147 |
+
spans_lex_item_choice_f = 0.02
|
148 |
+
spans_lex_part_choice_f = 0.1
|
149 |
|
150 |
[pretraining]
|
151 |
|
training/vocabulary/meta.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
-
"spacy_version":">=3.
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
@@ -21,10 +21,11 @@
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
24 |
-
"lex_part_choice",
|
25 |
"Category_confusion",
|
|
|
|
|
26 |
"Formational_affixes",
|
27 |
-
"
|
28 |
]
|
29 |
},
|
30 |
"pipeline":[
|
@@ -44,9 +45,10 @@
|
|
44 |
"spans_sc_r":0.0,
|
45 |
"spans_Category_confusion_f":0.0,
|
46 |
"spans_Formational_affixes_f":0.0,
|
|
|
47 |
"spans_lex_item_choice_f":0.0,
|
48 |
"spans_lex_part_choice_f":0.0,
|
49 |
-
"transformer_loss":
|
50 |
-
"spancat_loss":
|
51 |
}
|
52 |
}
|
|
|
2 |
"lang":"en",
|
3 |
"name":"pipeline",
|
4 |
"version":"0.0.0",
|
5 |
+
"spacy_version":">=3.5.0,<3.6.0",
|
6 |
"description":"",
|
7 |
"author":"",
|
8 |
"email":"",
|
|
|
21 |
|
22 |
],
|
23 |
"spancat":[
|
|
|
24 |
"Category_confusion",
|
25 |
+
"lex_item_choice",
|
26 |
+
"lex_part_choice",
|
27 |
"Formational_affixes",
|
28 |
+
"Word_choice"
|
29 |
]
|
30 |
},
|
31 |
"pipeline":[
|
|
|
45 |
"spans_sc_r":0.0,
|
46 |
"spans_Category_confusion_f":0.0,
|
47 |
"spans_Formational_affixes_f":0.0,
|
48 |
+
"spans_Word_choice_f":0.0,
|
49 |
"spans_lex_item_choice_f":0.0,
|
50 |
"spans_lex_part_choice_f":0.0,
|
51 |
+
"transformer_loss":173.7833097329,
|
52 |
+
"spancat_loss":12657.1661671125
|
53 |
}
|
54 |
}
|
training/vocabulary/spancat/cfg
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
{
|
2 |
"labels":[
|
3 |
-
"lex_part_choice",
|
4 |
"Category_confusion",
|
|
|
|
|
5 |
"Formational_affixes",
|
6 |
-
"
|
7 |
],
|
8 |
"spans_key":"vocabulary",
|
9 |
"threshold":0.5,
|
|
|
1 |
{
|
2 |
"labels":[
|
|
|
3 |
"Category_confusion",
|
4 |
+
"lex_item_choice",
|
5 |
+
"lex_part_choice",
|
6 |
"Formational_affixes",
|
7 |
+
"Word_choice"
|
8 |
],
|
9 |
"spans_key":"vocabulary",
|
10 |
"threshold":0.5,
|
training/vocabulary/spancat/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d56402bec71548a712f888f0db3d34597457fdfff031ad827ccbce7bbb42d079
|
3 |
+
size 4725555
|
training/vocabulary/tokenizer
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
|
3 |
+
size 77066
|
training/vocabulary/transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f273e80465b0b3f1fcdc6df201ca82089a8c5578832c4f216281d883398e3d3
|
3 |
+
size 434208137
|
training/vocabulary/vocab/strings.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68ef8ebb8714b2871e7f5a109d6ce10f255ddb33230b3b0f4ffeae67e53ae7f9
|
3 |
+
size 199757
|