iproskurina commited on
Commit
b238f43
·
1 Parent(s): ac78dad

Update spaCy pipeline

Browse files
.ipynb_checkpoints/meta-checkpoint.json ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"en",
3
+ "name":"grammar_checker",
4
+ "version":"1.0.1",
5
+ "description":"Essay Grammar Checker",
6
+ "author":"Irina Proskurina",
7
+ "email":"",
8
+ "url":"",
9
+ "license":"CC BY-SA 3.0",
10
+ "spacy_version":">=3.4.4,<3.5.0",
11
+ "parent_package":"spacy",
12
+ "requirements":[
13
+ "spacy-transformers>=1.0.0,<1.1.0"
14
+ ],
15
+ "sources":[
16
+ {
17
+ "license":"MIT"
18
+ }
19
+ ],
20
+ "vectors":{
21
+ "width":0,
22
+ "vectors":0,
23
+ "keys":0,
24
+ "name":null
25
+ },
26
+ "pipeline":
27
+ "errors",
28
+ "components":"errors",
29
+ "labels":{
30
+
31
+ },
32
+ "performance":{
33
+ "spans_errors_p":0.7937892339,
34
+ "spans_errors_r":0.4476503759,
35
+ "spans_errors_f":0.5724644939,
36
+ "spans_errors_per_type":{
37
+ "Numerals":{
38
+ "p":0.7313328681,
39
+ "r":0.577092511,
40
+ "f":0.6451215759
41
+ },
42
+ "lex_item_choice":{
43
+ "p":0.7750791975,
44
+ "r":0.1950571353,
45
+ "f":0.3116772824
46
+ },
47
+ "Articles":{
48
+ "p":0.785046729,
49
+ "r":0.4552258065,
50
+ "f":0.5762822607
51
+ },
52
+ "Punctuation":{
53
+ "p":0.6955835962,
54
+ "r":0.2376077586,
55
+ "f":0.3542168675
56
+ },
57
+ "Prepositions":{
58
+ "p":0.8163471241,
59
+ "r":0.3766294227,
60
+ "f":0.5154507805
61
+ },
62
+ "Formational_affixes":{
63
+ "p":0.7269700333,
64
+ "r":0.6031307551,
65
+ "f":0.6592853548
66
+ },
67
+ "Agreement_errors":{
68
+ "p":0.7909018356,
69
+ "r":0.5164147994,
70
+ "f":0.6248423707
71
+ },
72
+ "Capitalisation":{
73
+ "p":0.8034148593,
74
+ "r":0.7899274047,
75
+ "f":0.7966140471
76
+ },
77
+ "Noun_number":{
78
+ "p":0.8251445087,
79
+ "r":0.5558079169,
80
+ "f":0.6642109345
81
+ },
82
+ "Tense_choice":{
83
+ "p":0.7827648115,
84
+ "r":0.5369458128,
85
+ "f":0.6369612856
86
+ },
87
+ "Spelling":{
88
+ "p":0.886746988,
89
+ "r":0.4357608052,
90
+ "f":0.5843588726
91
+ }
92
+ }
93
+ },
94
+ "speed":2779.5295317788,
95
+ "spacy_git_version":"61dfdd9fb",
96
+ "disabled":[
97
+
98
+ ]
99
+ }
README.md CHANGED
@@ -1,34 +1,21 @@
1
  ---
2
  tags:
3
- - Token Classification
4
- - spacy
5
- - SpanCategorizer
6
- - grammar_checker
7
- - essay_checker
8
-
9
  language:
10
  - en
11
  license: cc-by-sa-3.0
 
 
 
12
  ---
13
-
14
- # Essay Grammar Checker
15
-
16
- Essay Grammar Checker trained on [Russian Error-Annotated Learner English Corpus](https://realec.org).
17
-
18
- ## Training information
19
- The checker consists of 6 pipelines each trained on specific error types.
20
- Error Categories used for pipeline mapping:
21
-
22
- ```
23
- "spelling":{"Spelling", "Capitalisation"},
24
- "punctuation": {"Punctuation"},
25
- "articles": {"Articles"},
26
- "vocabulary": {"lex_item_choice", "lex_part_choice",
27
- 'Category_confusion','Formational_affixes'},
28
- "grammar_major": {'Tense_choice','Prepositions','Agreement_errors', 'Redundant_comp'},
29
- "grammar_minor": {'Word_order','Noun_number', 'Numerals','Verb_pattern', 'Determiners'}
30
- ```
31
-
32
- [Detailed information](https://github.com/upunaprosk/grammar_checker)
33
-
34
- [Example usage in Colab](https://github.com/upunaprosk/grammar_checker/blob/master/grammar_checker_example_usage.ipynb)
 
1
  ---
2
  tags:
3
+ - spacy
 
 
 
 
 
4
  language:
5
  - en
6
  license: cc-by-sa-3.0
7
+ model-index:
8
+ - name: en_grammar_checker
9
+ results: []
10
  ---
11
+ | Feature | Description |
12
+ | --- | --- |
13
+ | **Name** | `en_grammar_checker` |
14
+ | **Version** | `1.0.1` |
15
+ | **spaCy** | `>=3.4.4,<3.5.0` |
16
+ | **Default Pipeline** | `errors` |
17
+ | **Components** | `errors` |
18
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
19
+ | **Sources** | n/a |
20
+ | **License** | n/a |
21
+ | **Author** | [n/a]() |
 
 
 
 
 
 
 
 
 
 
 
config.cfg CHANGED
@@ -10,7 +10,7 @@ gpu_allocator = null
10
 
11
  [nlp]
12
  lang = "en"
13
- pipeline = ["punctuation","spelling","articles","grammar_major","grammar_minor","vocabulary"]
14
  disabled = []
15
  before_creation = null
16
  after_creation = null
@@ -20,23 +20,8 @@ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
 
21
  [components]
22
 
23
- [components.articles]
24
- factory = "articles"
25
-
26
- [components.grammar_major]
27
- factory = "grammar_major"
28
-
29
- [components.grammar_minor]
30
- factory = "grammar_minor"
31
-
32
- [components.punctuation]
33
- factory = "punctuation"
34
-
35
- [components.spelling]
36
- factory = "spelling"
37
-
38
- [components.vocabulary]
39
- factory = "vocabulary"
40
 
41
  [corpora]
42
 
 
10
 
11
  [nlp]
12
  lang = "en"
13
+ pipeline = ["errors"]
14
  disabled = []
15
  before_creation = null
16
  after_creation = null
 
20
 
21
  [components]
22
 
23
+ [components.errors]
24
+ factory = "errors"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  [corpora]
27
 
custom_factories.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ from spacy.language import Language
3
+ from pathlib import Path
4
+ from spacy.util import get_model_meta
5
+
6
+
7
+ model_path = Path(__file__).parent
8
+ meta = get_model_meta(model_path)
9
+ data_dir = f"{meta['lang']}_{meta['name']}-{meta['version']}"
10
+ components_path = model_path / data_dir / "training"
11
+
12
+ @Language.component("errors")
13
+ def errors(doc):
14
+ nlp_vocabulary = spacy.load(components_path)
15
+ return nlp_vocabulary(doc)
en_grammar_checker-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0a808972cae10f9b445a2435a70ee3664f8433086d3b3862febe7679cfb321e
3
- size 2441454414
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f240edb3c8aff37ea6bcb74b636a06920d77bde9dfff47dace902c0fc61e2063
3
+ size 27098
meta.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "lang":"en",
3
  "name":"grammar_checker",
4
- "version":"1.0.0",
5
  "description":"Essay Grammar Checker",
6
  "author":"Irina Proskurina",
7
  "email":"",
8
  "url":"",
9
  "license":"CC BY-SA 3.0",
10
- "spacy_version":">=3.3.0,<3.4.0",
11
  "parent_package":"spacy",
12
  "requirements":[
13
  "spacy-transformers>=1.0.0,<1.1.0"
@@ -24,128 +24,77 @@
24
  "name":null
25
  },
26
  "pipeline":[
27
- "punctuation",
28
- "spelling",
29
- "articles",
30
- "grammar_major",
31
- "grammar_minor",
32
- "vocabulary"
33
  ],
34
  "components":[
35
- "punctuation",
36
- "spelling",
37
- "articles",
38
- "grammar_major",
39
- "grammar_minor",
40
- "vocabulary"
41
  ],
42
  "labels":{
43
 
44
  },
45
  "performance":{
46
- "spans_articles_p":0.8274481176,
47
- "spans_articles_r":0.8197202306,
48
- "spans_articles_f":0.823566046,
49
- "spans_punctuation_p":0.8723066455,
50
- "spans_punctuation_r":0.6734372264,
51
- "spans_punctuation_f":0.7600790514,
52
- "spans_spelling_p":0.9037660231,
53
- "spans_spelling_r":0.9159588014,
54
- "spans_spelling_f":0.9098215644,
55
- "spans_spelling_per_type":{
56
- "Spelling":{
57
- "p":0.9044968253,
58
- "r":0.9227970539,
59
- "f":0.9135553018
60
- },
61
- "Capitalisation":{
62
- "p":0.8959762655,
63
- "r":0.8483146067,
64
- "f":0.8714942736
65
- }
66
- },
67
- "spans_vocabulary_p":0.8326700274,
68
- "spans_vocabulary_r":0.4697166234,
69
- "spans_vocabulary_f":0.6006188,
70
- "spans_vocabulary_per_type":{
71
- "lex_part_choice":{
72
- "p":0.8783783784,
73
- "r":0.0741444867,
74
- "f":0.1367461431
75
  },
76
  "lex_item_choice":{
77
- "p":0.8219627873,
78
- "r":0.5290802348,
79
- "f":0.6437755977
80
  },
81
- "Category_confusion":{
82
- "p":0.84472708,
83
- "r":0.5153572583,
84
- "f":0.6401606426
85
  },
86
- "Formational_affixes":{
87
- "p":0.8892307692,
88
- "r":0.5608020699,
89
- "f":0.6878222927
90
- }
91
- },
92
- "spans_grammar_minor_p":0.8690835299,
93
- "spans_grammar_minor_r":0.7119557892,
94
- "spans_grammar_minor_f":0.7827117807,
95
- "spans_grammar_minor_per_type":{
96
- "Verb_pattern":{
97
- "p":0.7399617591,
98
- "r":0.2491951062,
99
- "f":0.3728323699
100
- },
101
- "Noun_number":{
102
- "p":0.8889171489,
103
- "r":0.9032758286,
104
- "f":0.8960389694
105
  },
106
- "Word_order":{
107
- "p":0.8179384203,
108
- "r":0.2189179506,
109
- "f":0.3453928773
110
  },
111
- "Numerals":{
112
- "p":0.7062761506,
113
- "r":0.602426838,
114
- "f":0.6502311248
115
  },
116
- "Determiners":{
117
- "p":0.5853658537,
118
- "r":0.0231884058,
119
- "f":0.0446096654
120
- }
121
- },
122
- "spans_grammar_major_p":0.8694734373,
123
- "spans_grammar_major_r":0.6123730074,
124
- "spans_grammar_major_f":0.7186197538,
125
- "spans_grammar_major_per_type":{
126
  "Agreement_errors":{
127
- "p":0.8953090397,
128
- "r":0.7739296902,
129
- "f":0.8302062914
130
  },
131
- "Prepositions":{
132
- "p":0.881741712,
133
- "r":0.5715202053,
134
- "f":0.6935201401
135
  },
136
- "Redundant_comp":{
137
- "p":0.8028656126,
138
- "r":0.314191802,
139
- "f":0.4516397999
140
  },
141
  "Tense_choice":{
142
- "p":0.86231523,
143
- "r":0.7354107649,
144
- "f":0.7938231022
 
 
 
 
 
145
  }
146
  }
147
  },
148
- "speed":5613.7287946692,
149
  "spacy_git_version":"61dfdd9fb",
150
  "disabled":[
151
 
 
1
  {
2
  "lang":"en",
3
  "name":"grammar_checker",
4
+ "version":"1.0.1",
5
  "description":"Essay Grammar Checker",
6
  "author":"Irina Proskurina",
7
  "email":"",
8
  "url":"",
9
  "license":"CC BY-SA 3.0",
10
+ "spacy_version":">=3.4.4,<3.5.0",
11
  "parent_package":"spacy",
12
  "requirements":[
13
  "spacy-transformers>=1.0.0,<1.1.0"
 
24
  "name":null
25
  },
26
  "pipeline":[
27
+ "errors"
 
 
 
 
 
28
  ],
29
  "components":[
30
+ "errors"
 
 
 
 
 
31
  ],
32
  "labels":{
33
 
34
  },
35
  "performance":{
36
+ "spans_errors_p":0.7937892339,
37
+ "spans_errors_r":0.4476503759,
38
+ "spans_errors_f":0.5724644939,
39
+ "spans_errors_per_type":{
40
+ "Numerals":{
41
+ "p":0.7313328681,
42
+ "r":0.577092511,
43
+ "f":0.6451215759
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  },
45
  "lex_item_choice":{
46
+ "p":0.7750791975,
47
+ "r":0.1950571353,
48
+ "f":0.3116772824
49
  },
50
+ "Articles":{
51
+ "p":0.785046729,
52
+ "r":0.4552258065,
53
+ "f":0.5762822607
54
  },
55
+ "Punctuation":{
56
+ "p":0.6955835962,
57
+ "r":0.2376077586,
58
+ "f":0.3542168675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  },
60
+ "Prepositions":{
61
+ "p":0.8163471241,
62
+ "r":0.3766294227,
63
+ "f":0.5154507805
64
  },
65
+ "Formational_affixes":{
66
+ "p":0.7269700333,
67
+ "r":0.6031307551,
68
+ "f":0.6592853548
69
  },
 
 
 
 
 
 
 
 
 
 
70
  "Agreement_errors":{
71
+ "p":0.7909018356,
72
+ "r":0.5164147994,
73
+ "f":0.6248423707
74
  },
75
+ "Capitalisation":{
76
+ "p":0.8034148593,
77
+ "r":0.7899274047,
78
+ "f":0.7966140471
79
  },
80
+ "Noun_number":{
81
+ "p":0.8251445087,
82
+ "r":0.5558079169,
83
+ "f":0.6642109345
84
  },
85
  "Tense_choice":{
86
+ "p":0.7827648115,
87
+ "r":0.5369458128,
88
+ "f":0.6369612856
89
+ },
90
+ "Spelling":{
91
+ "p":0.886746988,
92
+ "r":0.4357608052,
93
+ "f":0.5843588726
94
  }
95
  }
96
  },
97
+ "speed":2779.5295317788,
98
  "spacy_git_version":"61dfdd9fb",
99
  "disabled":[
100
 
tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f3f5297dc14c5fd94488174149bd87b5f24dff28e6be993eb3ea7a92e53417
3
- size 78126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ad4ddf9a27837484c7fe2ae8a454167cfc9604ef2e7740a2e74a39cc2c1bc3
3
+ size 76990
vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6833af47d0c2d495d15d4781b4a39d7066075dfde086d647e2e3f079777f64fd
3
- size 13062
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0341677b1e682df9d40e4e944d83860fbff48c547e2251da6885f2bc6a3fa29
3
+ size 12938