iproskurina commited on
Commit
59d18c0
1 Parent(s): d387cbe

Update spaCy pipeline

Browse files
Files changed (43) hide show
  1. en_grammar_checker-any-py3-none-any.whl +2 -2
  2. meta.json +17 -17
  3. training/articles/config.cfg +6 -4
  4. training/articles/meta.json +6 -4
  5. training/articles/spancat/cfg +2 -1
  6. training/articles/spancat/model +2 -2
  7. training/articles/tokenizer +2 -2
  8. training/articles/transformer/model +2 -2
  9. training/articles/vocab/strings.json +2 -2
  10. training/grammar_major/config.cfg +10 -8
  11. training/grammar_major/meta.json +6 -4
  12. training/grammar_major/spancat/cfg +2 -1
  13. training/grammar_major/spancat/model +2 -2
  14. training/grammar_major/tokenizer +2 -2
  15. training/grammar_major/transformer/model +2 -2
  16. training/grammar_major/vocab/strings.json +2 -2
  17. training/grammar_minor/config.cfg +8 -8
  18. training/grammar_minor/meta.json +4 -6
  19. training/grammar_minor/spancat/cfg +1 -2
  20. training/grammar_minor/spancat/model +2 -2
  21. training/grammar_minor/tokenizer +2 -2
  22. training/grammar_minor/transformer/model +2 -2
  23. training/grammar_minor/vocab/strings.json +2 -2
  24. training/punctuation/config.cfg +4 -3
  25. training/punctuation/meta.json +3 -3
  26. training/punctuation/spancat/model +1 -1
  27. training/punctuation/tokenizer +2 -2
  28. training/punctuation/transformer/model +2 -2
  29. training/punctuation/vocab/strings.json +2 -2
  30. training/spelling/config.cfg +6 -5
  31. training/spelling/meta.json +5 -5
  32. training/spelling/spancat/cfg +2 -2
  33. training/spelling/spancat/model +1 -1
  34. training/spelling/tokenizer +2 -2
  35. training/spelling/transformer/model +2 -2
  36. training/spelling/vocab/strings.json +2 -2
  37. training/vocabulary/config.cfg +9 -7
  38. training/vocabulary/meta.json +7 -5
  39. training/vocabulary/spancat/cfg +3 -2
  40. training/vocabulary/spancat/model +2 -2
  41. training/vocabulary/tokenizer +2 -2
  42. training/vocabulary/transformer/model +2 -2
  43. training/vocabulary/vocab/strings.json +2 -2
en_grammar_checker-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6445e092baab35c56feffddb9f55318961754755bf94c76c8ccfbd8eff3adf04
3
- size 27023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:022e2aa7c5fe1b54f173deb7dd82b65f03bd5cc28900a0a0caadfebb6a377338
3
+ size 2441455340
meta.json CHANGED
@@ -7,16 +7,21 @@
7
  "email":"",
8
  "url":"",
9
  "license":"CC BY-SA 3.0",
10
- "spacy_version":">=3.5.0",
11
- "spacy_git_version":"61dfdd9fb",
 
 
 
 
 
 
 
 
12
  "vectors":{
13
  "width":0,
14
  "vectors":0,
15
  "keys":0,
16
  "name":null
17
- },
18
- "labels":{
19
-
20
  },
21
  "pipeline":[
22
  "punctuation",
@@ -34,18 +39,9 @@
34
  "grammar_minor",
35
  "vocabulary"
36
  ],
37
- "disabled":[
38
 
39
- ],
40
- "parent_package":"spacy",
41
- "requirements":[
42
- "spacy-transformers>=1.0.0"
43
- ],
44
- "sources":[
45
- {
46
- "license":"MIT"
47
- }
48
- ],
49
  "performance":{
50
  "spans_articles_p":0.8274481176,
51
  "spans_articles_r":0.8197202306,
@@ -149,5 +145,9 @@
149
  }
150
  }
151
  },
152
- "speed":5613.7287946692
 
 
 
 
153
  }
 
7
  "email":"",
8
  "url":"",
9
  "license":"CC BY-SA 3.0",
10
+ "spacy_version":">=3.5.0,<3.6.0",
11
+ "parent_package":"spacy",
12
+ "requirements":[
13
+ "spacy-transformers>=1.0.0"
14
+ ],
15
+ "sources":[
16
+ {
17
+ "license":"MIT"
18
+ }
19
+ ],
20
  "vectors":{
21
  "width":0,
22
  "vectors":0,
23
  "keys":0,
24
  "name":null
 
 
 
25
  },
26
  "pipeline":[
27
  "punctuation",
 
39
  "grammar_minor",
40
  "vocabulary"
41
  ],
42
+ "labels":{
43
 
44
+ },
 
 
 
 
 
 
 
 
 
45
  "performance":{
46
  "spans_articles_p":0.8274481176,
47
  "spans_articles_r":0.8197202306,
 
145
  }
146
  }
147
  },
148
+ "speed":5613.7287946692,
149
+ "spacy_git_version":"61dfdd9fb",
150
+ "disabled":[
151
+
152
+ ]
153
  }
training/articles/config.cfg CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
- patience = 1600
101
  max_epochs = 0
102
  max_steps = 20000
103
- eval_frequency = 200
104
  annotating_components = []
105
  before_to_disk = null
 
106
 
107
  [training.batcher]
108
  @batchers = "spacy.batch_by_padded.v1"
@@ -113,7 +114,7 @@ get_length = null
113
 
114
  [training.logger]
115
  @loggers = "spacy.WandbLogger.v3"
116
- project_name = "adwiser"
117
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
118
  model_log_interval = 1000
119
  log_dataset_dir = null
@@ -140,7 +141,8 @@ initial_rate = 0.00005
140
  spans_sc_f = 0.5
141
  spans_sc_p = 0.0
142
  spans_sc_r = 0.0
143
- spans_Articles_f = 0.5
 
144
 
145
  [pretraining]
146
 
 
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
+ patience = 3000
101
  max_epochs = 0
102
  max_steps = 20000
103
+ eval_frequency = 500
104
  annotating_components = []
105
  before_to_disk = null
106
+ before_update = null
107
 
108
  [training.batcher]
109
  @batchers = "spacy.batch_by_padded.v1"
 
114
 
115
  [training.logger]
116
  @loggers = "spacy.WandbLogger.v3"
117
+ project_name = "grammar_checker"
118
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
119
  model_log_interval = 1000
120
  log_dataset_dir = null
 
141
  spans_sc_f = 0.5
142
  spans_sc_p = 0.0
143
  spans_sc_r = 0.0
144
+ spans_Articles_f = 0.01
145
+ spans_Determiners_f = 0.49
146
 
147
  [pretraining]
148
 
training/articles/meta.json CHANGED
@@ -2,7 +2,7 @@
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
- "spacy_version":">=3.3.0,<3.4.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
@@ -21,7 +21,8 @@
21
 
22
  ],
23
  "spancat":[
24
- "Articles"
 
25
  ]
26
  },
27
  "pipeline":[
@@ -40,7 +41,8 @@
40
  "spans_sc_p":0.0,
41
  "spans_sc_r":0.0,
42
  "spans_Articles_f":0.0,
43
- "transformer_loss":100.8329219741,
44
- "spancat_loss":2964.9278720927
 
45
  }
46
  }
 
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
+ "spacy_version":">=3.5.0,<3.6.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
 
21
 
22
  ],
23
  "spancat":[
24
+ "Articles",
25
+ "Determiners"
26
  ]
27
  },
28
  "pipeline":[
 
41
  "spans_sc_p":0.0,
42
  "spans_sc_r":0.0,
43
  "spans_Articles_f":0.0,
44
+ "spans_Determiners_f":0.0,
45
+ "transformer_loss":105.063624234,
46
+ "spancat_loss":5765.2548046919
47
  }
48
  }
training/articles/spancat/cfg CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "labels":[
3
- "Articles"
 
4
  ],
5
  "spans_key":"articles",
6
  "threshold":0.5,
 
1
  {
2
  "labels":[
3
+ "Articles",
4
+ "Determiners"
5
  ],
6
  "spans_key":"articles",
7
  "threshold":0.5,
training/articles/spancat/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bcd34ab7d5ff9adb0214059514deb98fb9c131f29d0a0a7a4c520fcafd3e875
3
- size 4723491
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c35e698ee5d227a921ebfc90e99e502db197e5c2897b6c574f14a32e8630821
3
+ size 4724007
training/articles/tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f3f5297dc14c5fd94488174149bd87b5f24dff28e6be993eb3ea7a92e53417
3
- size 78126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
3
+ size 77066
training/articles/transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2036ce8a5df6924d046d42dd67dc03f670b4d1d25c2d8073de95822165e9a5ed
3
- size 434208023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffd19d8ecc91a52ab4494d5ed5dce4469a3ae220895b98c5b63705fadac330b
3
+ size 434208137
training/articles/vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6c403d7808ec8eeeafe465538d7df7f45c1202362b29c9c627a7ed8302dc210
3
- size 287029
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:121520107d12556bee02dbe233c8b08eab86d33fd2839eaf7505679d63227ad9
3
+ size 289348
training/grammar_major/config.cfg CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
- patience = 1600
101
  max_epochs = 0
102
  max_steps = 20000
103
- eval_frequency = 200
104
  annotating_components = []
105
  before_to_disk = null
 
106
 
107
  [training.batcher]
108
  @batchers = "spacy.batch_by_padded.v1"
@@ -113,7 +114,7 @@ get_length = null
113
 
114
  [training.logger]
115
  @loggers = "spacy.WandbLogger.v3"
116
- project_name = "adwiser"
117
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
118
  model_log_interval = 1000
119
  log_dataset_dir = null
@@ -137,13 +138,14 @@ total_steps = 20000
137
  initial_rate = 0.00005
138
 
139
  [training.score_weights]
140
- spans_sc_f = 0.5
141
  spans_sc_p = 0.0
142
  spans_sc_r = 0.0
143
- spans_Agreement_errors_f = 0.13
144
- spans_Prepositions_f = 0.12
145
- spans_Redundant_comp_f = 0.14
146
- spans_Tense_choice_f = 0.11
 
147
 
148
  [pretraining]
149
 
 
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
+ patience = 3000
101
  max_epochs = 0
102
  max_steps = 20000
103
+ eval_frequency = 500
104
  annotating_components = []
105
  before_to_disk = null
106
+ before_update = null
107
 
108
  [training.batcher]
109
  @batchers = "spacy.batch_by_padded.v1"
 
114
 
115
  [training.logger]
116
  @loggers = "spacy.WandbLogger.v3"
117
+ project_name = "grammar_checker"
118
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
119
  model_log_interval = 1000
120
  log_dataset_dir = null
 
138
  initial_rate = 0.00005
139
 
140
  [training.score_weights]
141
+ spans_sc_f = 0.51
142
  spans_sc_p = 0.0
143
  spans_sc_r = 0.0
144
+ spans_Absence_comp_sent_f = 0.15
145
+ spans_Agreement_errors_f = 0.09
146
+ spans_Prepositions_f = 0.08
147
+ spans_Redundant_comp_f = 0.1
148
+ spans_Tense_choice_f = 0.07
149
 
150
  [pretraining]
151
 
training/grammar_major/meta.json CHANGED
@@ -2,7 +2,7 @@
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
- "spacy_version":">=3.3.0,<3.4.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
@@ -21,8 +21,9 @@
21
 
22
  ],
23
  "spancat":[
24
- "Agreement_errors",
25
  "Prepositions",
 
26
  "Tense_choice",
27
  "Redundant_comp"
28
  ]
@@ -42,11 +43,12 @@
42
  "spans_sc_f":0.0,
43
  "spans_sc_p":0.0,
44
  "spans_sc_r":0.0,
 
45
  "spans_Agreement_errors_f":0.0,
46
  "spans_Prepositions_f":0.0,
47
  "spans_Redundant_comp_f":0.0,
48
  "spans_Tense_choice_f":0.0,
49
- "transformer_loss":155.2905472092,
50
- "spancat_loss":4539.9674681051
51
  }
52
  }
 
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
+ "spacy_version":">=3.5.0,<3.6.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
 
21
 
22
  ],
23
  "spancat":[
24
+ "Absence_comp_sent",
25
  "Prepositions",
26
+ "Agreement_errors",
27
  "Tense_choice",
28
  "Redundant_comp"
29
  ]
 
43
  "spans_sc_f":0.0,
44
  "spans_sc_p":0.0,
45
  "spans_sc_r":0.0,
46
+ "spans_Absence_comp_sent_f":0.0,
47
  "spans_Agreement_errors_f":0.0,
48
  "spans_Prepositions_f":0.0,
49
  "spans_Redundant_comp_f":0.0,
50
  "spans_Tense_choice_f":0.0,
51
+ "transformer_loss":216.1842367096,
52
+ "spancat_loss":10959.7070766776
53
  }
54
  }
training/grammar_major/spancat/cfg CHANGED
@@ -1,7 +1,8 @@
1
  {
2
  "labels":[
3
- "Agreement_errors",
4
  "Prepositions",
 
5
  "Tense_choice",
6
  "Redundant_comp"
7
  ],
 
1
  {
2
  "labels":[
3
+ "Absence_comp_sent",
4
  "Prepositions",
5
+ "Agreement_errors",
6
  "Tense_choice",
7
  "Redundant_comp"
8
  ],
training/grammar_major/spancat/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e9fbf9b48ca97b5fe06e25c549763cea2b182321819a81a2ea9b61eede20cfa
3
- size 4725039
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cca1ce63e726d8e2d50e58a0ed7306d96a3f65a97192d5dd296ccc57dd65b685
3
+ size 4725555
training/grammar_major/tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f3f5297dc14c5fd94488174149bd87b5f24dff28e6be993eb3ea7a92e53417
3
- size 78126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
3
+ size 77066
training/grammar_major/transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:745dee6d568bdaf5acde6d3cb6fd6cccfed39741771b1f36ea1d94968d43b2ff
3
- size 434208023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09516ade4845a1fd132ee39328e46af1fe8c75178a3b7d87f717a9586e62a23d
3
+ size 434208137
training/grammar_major/vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ef59bf11f594ff3dbb5802fd852d888513130a338f18d441d0738d16cfd1d9f
3
- size 173293
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8f06faf2807cf7c9eba22f9a969617b204b21fa9181a0392919a6ecd8a24f5b
3
+ size 182743
training/grammar_minor/config.cfg CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
- patience = 1600
101
  max_epochs = 0
102
  max_steps = 20000
103
- eval_frequency = 200
104
  annotating_components = []
105
  before_to_disk = null
 
106
 
107
  [training.batcher]
108
  @batchers = "spacy.batch_by_padded.v1"
@@ -113,7 +114,7 @@ get_length = null
113
 
114
  [training.logger]
115
  @loggers = "spacy.WandbLogger.v3"
116
- project_name = "adwiser"
117
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
118
  model_log_interval = 1000
119
  log_dataset_dir = null
@@ -140,11 +141,10 @@ initial_rate = 0.00005
140
  spans_sc_f = 0.5
141
  spans_sc_p = 0.0
142
  spans_sc_r = 0.0
143
- spans_Determiners_f = 0.18
144
- spans_Noun_number_f = 0.01
145
- spans_Numerals_f = 0.13
146
- spans_Verb_pattern_f = 0.12
147
- spans_Word_order_f = 0.07
148
 
149
  [pretraining]
150
 
 
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
+ patience = 3000
101
  max_epochs = 0
102
  max_steps = 20000
103
+ eval_frequency = 500
104
  annotating_components = []
105
  before_to_disk = null
106
+ before_update = null
107
 
108
  [training.batcher]
109
  @batchers = "spacy.batch_by_padded.v1"
 
114
 
115
  [training.logger]
116
  @loggers = "spacy.WandbLogger.v3"
117
+ project_name = "grammar_checker"
118
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
119
  model_log_interval = 1000
120
  log_dataset_dir = null
 
141
  spans_sc_f = 0.5
142
  spans_sc_p = 0.0
143
  spans_sc_r = 0.0
144
+ spans_Noun_number_f = 0.02
145
+ spans_Numerals_f = 0.19
146
+ spans_Verb_pattern_f = 0.19
147
+ spans_Word_order_f = 0.1
 
148
 
149
  [pretraining]
150
 
training/grammar_minor/meta.json CHANGED
@@ -2,7 +2,7 @@
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
- "spacy_version":">=3.3.0,<3.4.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
@@ -21,10 +21,9 @@
21
 
22
  ],
23
  "spancat":[
 
24
  "Verb_pattern",
25
- "Determiners",
26
  "Numerals",
27
- "Word_order",
28
  "Noun_number"
29
  ]
30
  },
@@ -43,12 +42,11 @@
43
  "spans_sc_f":0.0,
44
  "spans_sc_p":0.0,
45
  "spans_sc_r":0.0,
46
- "spans_Determiners_f":0.0,
47
  "spans_Noun_number_f":0.0,
48
  "spans_Numerals_f":0.0,
49
  "spans_Verb_pattern_f":0.0,
50
  "spans_Word_order_f":0.0,
51
- "transformer_loss":74.2849839284,
52
- "spancat_loss":3224.212622695
53
  }
54
  }
 
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
+ "spacy_version":">=3.5.0,<3.6.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
 
21
 
22
  ],
23
  "spancat":[
24
+ "Word_order",
25
  "Verb_pattern",
 
26
  "Numerals",
 
27
  "Noun_number"
28
  ]
29
  },
 
42
  "spans_sc_f":0.0,
43
  "spans_sc_p":0.0,
44
  "spans_sc_r":0.0,
 
45
  "spans_Noun_number_f":0.0,
46
  "spans_Numerals_f":0.0,
47
  "spans_Verb_pattern_f":0.0,
48
  "spans_Word_order_f":0.0,
49
+ "transformer_loss":99.9441129311,
50
+ "spancat_loss":3890.6939574572
51
  }
52
  }
training/grammar_minor/spancat/cfg CHANGED
@@ -1,9 +1,8 @@
1
  {
2
  "labels":[
 
3
  "Verb_pattern",
4
- "Determiners",
5
  "Numerals",
6
- "Word_order",
7
  "Noun_number"
8
  ],
9
  "spans_key":"grammar_minor",
 
1
  {
2
  "labels":[
3
+ "Word_order",
4
  "Verb_pattern",
 
5
  "Numerals",
 
6
  "Noun_number"
7
  ],
8
  "spans_key":"grammar_minor",
training/grammar_minor/spancat/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e0283722a11d5a844cd733d76a12574c63e598f946c2d5843a6f81d80ec9dbe
3
- size 4725555
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d61ee1924aa0506de865309bed18c6971257a33d78a8818bceadf0bf40302019
3
+ size 4725039
training/grammar_minor/tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f3f5297dc14c5fd94488174149bd87b5f24dff28e6be993eb3ea7a92e53417
3
- size 78126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
3
+ size 77066
training/grammar_minor/transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a214501cb8a98fc0d7934072baa83b46d144b0f255f60afd65c190c2ed876d61
3
- size 434208023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d42e4d76dc45e2eb8b60a150e5e0445a1ed86933a2f45007babeefa73b934ed
3
+ size 434208137
training/grammar_minor/vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:169ca15549fd804567ae7d8222e75f320265558fb39b5b90add91046eb7a3723
3
- size 201573
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fa56d47ed9e1b90027c9fe2cc5bf277964169dfa5d986559a698c63fa81ae72
3
+ size 198085
training/punctuation/config.cfg CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
- patience = 1600
101
  max_epochs = 0
102
  max_steps = 20000
103
- eval_frequency = 200
104
  annotating_components = []
105
  before_to_disk = null
 
106
 
107
  [training.batcher]
108
  @batchers = "spacy.batch_by_padded.v1"
@@ -113,7 +114,7 @@ get_length = null
113
 
114
  [training.logger]
115
  @loggers = "spacy.WandbLogger.v3"
116
- project_name = "adwiser"
117
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
118
  model_log_interval = 1000
119
  log_dataset_dir = null
 
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
+ patience = 3000
101
  max_epochs = 0
102
  max_steps = 20000
103
+ eval_frequency = 500
104
  annotating_components = []
105
  before_to_disk = null
106
+ before_update = null
107
 
108
  [training.batcher]
109
  @batchers = "spacy.batch_by_padded.v1"
 
114
 
115
  [training.logger]
116
  @loggers = "spacy.WandbLogger.v3"
117
+ project_name = "grammar_checker"
118
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
119
  model_log_interval = 1000
120
  log_dataset_dir = null
training/punctuation/meta.json CHANGED
@@ -2,7 +2,7 @@
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
- "spacy_version":">=3.3.0,<3.4.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
@@ -40,7 +40,7 @@
40
  "spans_sc_p":0.0,
41
  "spans_sc_r":0.0,
42
  "spans_Punctuation_f":0.0,
43
- "transformer_loss":155.9655457946,
44
- "spancat_loss":2200.9864988542
45
  }
46
  }
 
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
+ "spacy_version":">=3.5.0,<3.6.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
 
40
  "spans_sc_p":0.0,
41
  "spans_sc_r":0.0,
42
  "spans_Punctuation_f":0.0,
43
+ "transformer_loss":172.0570048221,
44
+ "spancat_loss":3528.349909951
45
  }
46
  }
training/punctuation/spancat/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80d8d230b00ce3dfdf04de2e52fc4d14b2ac8e98e0c785e889c5e795940c06b2
3
  size 4723491
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03b50aed75d78f66bc433cddd3fe327f35b211ff17db30a5ce07f2c2663db003
3
  size 4723491
training/punctuation/tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f3f5297dc14c5fd94488174149bd87b5f24dff28e6be993eb3ea7a92e53417
3
- size 78126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
3
+ size 77066
training/punctuation/transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3cab4830ec3d78cf951043924a896eb0146f4647f4adf3160ac035127f6fd6
3
- size 434208023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de2907121fc5ce12a161537cd083df61e6c2e6d584a49e5171cac51c2ced0b32
3
+ size 434208137
training/punctuation/vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26013a15de37a4180447eed7bd1ab0359f9481e2ed063d9c0c5f6201ba164d30
3
- size 139281
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8cf3162ed4f20a4c130de403ffbbb31eceb774b687e37e2a6d2309d2234a761
3
+ size 139157
training/spelling/config.cfg CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
- patience = 1600
101
  max_epochs = 0
102
  max_steps = 20000
103
- eval_frequency = 200
104
  annotating_components = []
105
  before_to_disk = null
 
106
 
107
  [training.batcher]
108
  @batchers = "spacy.batch_by_padded.v1"
@@ -113,7 +114,7 @@ get_length = null
113
 
114
  [training.logger]
115
  @loggers = "spacy.WandbLogger.v3"
116
- project_name = "adwiser"
117
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
118
  model_log_interval = 1000
119
  log_dataset_dir = null
@@ -140,8 +141,8 @@ initial_rate = 0.00005
140
  spans_sc_f = 0.5
141
  spans_sc_p = 0.0
142
  spans_sc_r = 0.0
143
- spans_Capitalisation_f = 0.45
144
- spans_Spelling_f = 0.05
145
 
146
  [pretraining]
147
 
 
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
+ patience = 3000
101
  max_epochs = 0
102
  max_steps = 20000
103
+ eval_frequency = 500
104
  annotating_components = []
105
  before_to_disk = null
106
+ before_update = null
107
 
108
  [training.batcher]
109
  @batchers = "spacy.batch_by_padded.v1"
 
114
 
115
  [training.logger]
116
  @loggers = "spacy.WandbLogger.v3"
117
+ project_name = "grammar_checker"
118
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
119
  model_log_interval = 1000
120
  log_dataset_dir = null
 
141
  spans_sc_f = 0.5
142
  spans_sc_p = 0.0
143
  spans_sc_r = 0.0
144
+ spans_Capitalisation_f = 0.46
145
+ spans_Spelling_f = 0.04
146
 
147
  [pretraining]
148
 
training/spelling/meta.json CHANGED
@@ -2,7 +2,7 @@
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
- "spacy_version":">=3.3.0,<3.4.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
@@ -21,8 +21,8 @@
21
 
22
  ],
23
  "spancat":[
24
- "Spelling",
25
- "Capitalisation"
26
  ]
27
  },
28
  "pipeline":[
@@ -42,7 +42,7 @@
42
  "spans_sc_r":0.0,
43
  "spans_Capitalisation_f":0.0,
44
  "spans_Spelling_f":0.0,
45
- "transformer_loss":44.0993431434,
46
- "spancat_loss":2243.7531562905
47
  }
48
  }
 
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
+ "spacy_version":">=3.5.0,<3.6.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
 
21
 
22
  ],
23
  "spancat":[
24
+ "Capitalisation",
25
+ "Spelling"
26
  ]
27
  },
28
  "pipeline":[
 
42
  "spans_sc_r":0.0,
43
  "spans_Capitalisation_f":0.0,
44
  "spans_Spelling_f":0.0,
45
+ "transformer_loss":66.6583146576,
46
+ "spancat_loss":3508.9703819749
47
  }
48
  }
training/spelling/spancat/cfg CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "labels":[
3
- "Spelling",
4
- "Capitalisation"
5
  ],
6
  "spans_key":"spelling",
7
  "threshold":0.5,
 
1
  {
2
  "labels":[
3
+ "Capitalisation",
4
+ "Spelling"
5
  ],
6
  "spans_key":"spelling",
7
  "threshold":0.5,
training/spelling/spancat/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a115c30432686880e39e77d6d3d1a862d44334c8711dba4113575f85baa9d1a
3
  size 4724007
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f921a2fffab88db8651cf3858d1acac3c73c26e5014a1fc55da21bb39ac72fe
3
  size 4724007
training/spelling/tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f3f5297dc14c5fd94488174149bd87b5f24dff28e6be993eb3ea7a92e53417
3
- size 78126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
3
+ size 77066
training/spelling/transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ad13e7ffe5523eb6e644ff92cfb78a909ff71756d3c80cff1e7124d1559835
3
- size 434208023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d615362cf525b2373d9237d580f6b0f34e79b63c25e47be2d4b075de43cfe4
3
+ size 434208137
training/spelling/vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac0eded0abfa2f25a418b9fd9b9164a068608ecd16746d0b6595b53e508eff7b
3
- size 597160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:789839bc7c391d42d0ee973fbde3d4fefe329020e0410d3341bd023ef3953dcc
3
+ size 597048
training/vocabulary/config.cfg CHANGED
@@ -97,12 +97,13 @@ frozen_components = []
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
- patience = 1600
101
  max_epochs = 0
102
  max_steps = 20000
103
- eval_frequency = 200
104
  annotating_components = []
105
  before_to_disk = null
 
106
 
107
  [training.batcher]
108
  @batchers = "spacy.batch_by_padded.v1"
@@ -113,7 +114,7 @@ get_length = null
113
 
114
  [training.logger]
115
  @loggers = "spacy.WandbLogger.v3"
116
- project_name = "adwiser"
117
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
118
  model_log_interval = 1000
119
  log_dataset_dir = null
@@ -140,10 +141,11 @@ initial_rate = 0.00005
140
  spans_sc_f = 0.5
141
  spans_sc_p = 0.0
142
  spans_sc_r = 0.0
143
- spans_Category_confusion_f = 0.11
144
- spans_Formational_affixes_f = 0.23
145
- spans_lex_item_choice_f = 0.03
146
- spans_lex_part_choice_f = 0.13
 
147
 
148
  [pretraining]
149
 
 
97
  seed = 0
98
  gpu_allocator = "pytorch"
99
  dropout = 0.1
100
+ patience = 3000
101
  max_epochs = 0
102
  max_steps = 20000
103
+ eval_frequency = 500
104
  annotating_components = []
105
  before_to_disk = null
106
+ before_update = null
107
 
108
  [training.batcher]
109
  @batchers = "spacy.batch_by_padded.v1"
 
114
 
115
  [training.logger]
116
  @loggers = "spacy.WandbLogger.v3"
117
+ project_name = "grammar_checker"
118
  remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
119
  model_log_interval = 1000
120
  log_dataset_dir = null
 
141
  spans_sc_f = 0.5
142
  spans_sc_p = 0.0
143
  spans_sc_r = 0.0
144
+ spans_Category_confusion_f = 0.09
145
+ spans_Formational_affixes_f = 0.18
146
+ spans_Word_choice_f = 0.12
147
+ spans_lex_item_choice_f = 0.02
148
+ spans_lex_part_choice_f = 0.1
149
 
150
  [pretraining]
151
 
training/vocabulary/meta.json CHANGED
@@ -2,7 +2,7 @@
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
- "spacy_version":">=3.3.0,<3.4.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
@@ -21,10 +21,11 @@
21
 
22
  ],
23
  "spancat":[
24
- "lex_part_choice",
25
  "Category_confusion",
 
 
26
  "Formational_affixes",
27
- "lex_item_choice"
28
  ]
29
  },
30
  "pipeline":[
@@ -44,9 +45,10 @@
44
  "spans_sc_r":0.0,
45
  "spans_Category_confusion_f":0.0,
46
  "spans_Formational_affixes_f":0.0,
 
47
  "spans_lex_item_choice_f":0.0,
48
  "spans_lex_part_choice_f":0.0,
49
- "transformer_loss":125.5026042824,
50
- "spancat_loss":5766.8357456917
51
  }
52
  }
 
2
  "lang":"en",
3
  "name":"pipeline",
4
  "version":"0.0.0",
5
+ "spacy_version":">=3.5.0,<3.6.0",
6
  "description":"",
7
  "author":"",
8
  "email":"",
 
21
 
22
  ],
23
  "spancat":[
 
24
  "Category_confusion",
25
+ "lex_item_choice",
26
+ "lex_part_choice",
27
  "Formational_affixes",
28
+ "Word_choice"
29
  ]
30
  },
31
  "pipeline":[
 
45
  "spans_sc_r":0.0,
46
  "spans_Category_confusion_f":0.0,
47
  "spans_Formational_affixes_f":0.0,
48
+ "spans_Word_choice_f":0.0,
49
  "spans_lex_item_choice_f":0.0,
50
  "spans_lex_part_choice_f":0.0,
51
+ "transformer_loss":173.7833097329,
52
+ "spancat_loss":12657.1661671125
53
  }
54
  }
training/vocabulary/spancat/cfg CHANGED
@@ -1,9 +1,10 @@
1
  {
2
  "labels":[
3
- "lex_part_choice",
4
  "Category_confusion",
 
 
5
  "Formational_affixes",
6
- "lex_item_choice"
7
  ],
8
  "spans_key":"vocabulary",
9
  "threshold":0.5,
 
1
  {
2
  "labels":[
 
3
  "Category_confusion",
4
+ "lex_item_choice",
5
+ "lex_part_choice",
6
  "Formational_affixes",
7
+ "Word_choice"
8
  ],
9
  "spans_key":"vocabulary",
10
  "threshold":0.5,
training/vocabulary/spancat/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:957b3c814e94d3c5b233a697e7556a64d7529130f06b649c3e056b5a9511c079
3
- size 4725039
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d56402bec71548a712f888f0db3d34597457fdfff031ad827ccbce7bbb42d079
3
+ size 4725555
training/vocabulary/tokenizer CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f3f5297dc14c5fd94488174149bd87b5f24dff28e6be993eb3ea7a92e53417
3
- size 78126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b014e8bba4958b120af2d0c1c63eabb7c00379f2bacaf10df7c5325efd2ea467
3
+ size 77066
training/vocabulary/transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fac564f7a1784543ea4ea9f680cce25f7258ef0df9832544f826506318cd323
3
- size 434208023
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f273e80465b0b3f1fcdc6df201ca82089a8c5578832c4f216281d883398e3d3
3
+ size 434208137
training/vocabulary/vocab/strings.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:820d6e5b6b5855296d3f920b104d62fb7d1cd1edd2b095c10c01b481a1a5f388
3
- size 192211
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68ef8ebb8714b2871e7f5a109d6ce10f255ddb33230b3b0f4ffeae67e53ae7f9
3
+ size 199757