mrqorib commited on
Commit
d40842b
1 Parent(s): c3248d3
.gitattributes CHANGED
@@ -1,6 +1,7 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
  *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
6
  *.ckpt filter=lfs diff=lfs merge=lfs -text
7
  *.ftz filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,59 @@
1
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  license: gpl-3.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language: en
3
+ tags:
4
+ - greco
5
+ - grammar
6
+ - grammaticality
7
+ - gec
8
+ base_model: microsoft/deberta-v3-large
9
+ datasets: w&i+locness
10
+ model-index:
11
+ - name: GRECO
12
+ results:
13
+ - task:
14
+ type: grammatical-error-correction
15
+ name: Grammatical Error Correction
16
+ dataset:
17
+ type: conll-2014-shared-task-grammatical-error
18
+ name: CoNLL-2014
19
+ split: test
20
+ metrics:
21
+ - type: f0.5
22
+ value: 71.12
23
+ name: F0.5
24
+ source:
25
+ name: NLP-progress
26
+ url: https://nlpprogress.com/english/grammatical_error_correction.html
27
  license: gpl-3.0
28
  ---
29
+
30
+ # GRECO: Gammaticality-scorer for re-ranking corrections
31
+ GRECO is a quality estimation model for grammatical error correction. The model is trained to detect which words are incorrect and whether a word or phrase needs to be inserted after certain words. You can then use the model to get the grammaticality score of a sentence.
32
+
33
+ Please check the [official repository](https://github.com/nusnlp/greco/tree/main) for more implementation details and updates.
34
+
35
+ The model was published in the following paper:
36
+ > System Combination via Quality Estimation for Grammatical Error Correction ([PDF](https://arxiv.org/abs/2310.14947) | [ACL Anthology](https://aclanthology.org/2023.emnlp-main.785/)) <br>
37
+ > [Muhammad Reza Qorib](https://mrqorib.github.io/) and [Hwee Tou Ng](https://www.comp.nus.edu.sg/~nght/) <br>
38
+ > The 2023 Conference on Empirical Methods in Natural Language Processing (EMNLP)
39
+
40
+ ## Citation
41
+ If you find it useful for your work, please cite the paper:
42
+ ```latex
43
+ @inproceedings{qorib-ng-2023-system,
44
+ title = "System Combination via Quality Estimation for Grammatical Error Correction",
45
+ author = "Qorib, Muhammad Reza and
46
+ Ng, Hwee Tou",
47
+ editor = "Bouamor, Houda and
48
+ Pino, Juan and
49
+ Bali, Kalika",
50
+ booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
51
+ month = dec,
52
+ year = "2023",
53
+ address = "Singapore",
54
+ publisher = "Association for Computational Linguistics",
55
+ url = "https://aclanthology.org/2023.emnlp-main.785",
56
+ doi = "10.18653/v1/2023.emnlp-main.785",
57
+ pages = "12746--12759",
58
+ }
59
+ ```
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "deberta-v2",
3
+ "attention_probs_dropout_prob": 0.1,
4
+ "hidden_act": "gelu",
5
+ "hidden_dropout_prob": 0.1,
6
+ "hidden_size": 1024,
7
+ "initializer_range": 0.02,
8
+ "intermediate_size": 4096,
9
+ "max_position_embeddings": 512,
10
+ "relative_attention": true,
11
+ "position_buckets": 256,
12
+ "norm_rel_ebd": "layer_norm",
13
+ "share_att_key": true,
14
+ "pos_att_type": "p2c|c2p",
15
+ "layer_norm_eps": 1e-7,
16
+ "max_relative_positions": -1,
17
+ "position_biased_input": false,
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 24,
20
+ "type_vocab_size": 0,
21
+ "vocab_size": 128100
22
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a11360c12bdfb20660555fe1e4a71e28b4c21e0acd0b4077d8af80a8f096884
3
+ size 1748823771
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "[CLS]",
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "name_or_path": "microsoft/deberta-v3-large",
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "special_tokens_map_file": null,
13
+ "split_by_punct": false,
14
+ "tokenizer_class": "DebertaV2Tokenizer",
15
+ "unk_token": "[UNK]",
16
+ "vocab_type": "spm"
17
+ }