johnjets commited on
Commit
882d84f
1 Parent(s): a9eabfe
__pycache__/gramformerjohn.cpython-311.pyc ADDED
Binary file (5.67 kB). View file
 
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[1]:
5
+
6
+
7
+ from gramformerjohn import Gramformer
8
+ import gradio as gr
9
+
10
+ import spacy
11
+
12
+
13
+
14
+
15
+ # In[2]:
16
+
17
+
18
+ gf = Gramformer(models = 1, use_gpu = False)
19
+
20
+
21
+ # In[3]:
22
+
23
+
24
+ name = "how are you"
25
+
26
+
27
+ # In[13]:
28
+
29
+ # In[5]:
30
+
31
+
32
+ def levenstein_score(correct_output, sentences):
33
+ max_wrong = max(len(correct_output), len(sentences))
34
+ actual_wrong = distance(correct_output, sentences)
35
+ return (max_wrong - actual_wrong)/max_wrong
36
+
37
+
38
+ # In[28]:
39
+
40
+
41
+ import gradio as gr
42
+ import textstat
43
+
44
+ from Levenshtein import distance
45
+ def correct_sentence(sentences):
46
+ if(len(sentences) == 0):
47
+ return 'Output','-', '-', "Please Input Text."
48
+ sentences = sentences.strip()
49
+ corrected = gf.correct(sentences)
50
+ for corrected_setence in corrected:
51
+ correct_output = corrected_setence
52
+ return 'Output', round(levenstein_score(correct_output, sentences)*100,2), textstat.flesch_reading_ease(sentences), gf.highlight(correct_output,sentences)
53
+
54
+ demo = gr.Interface(
55
+ fn=correct_sentence,
56
+ inputs=gr.Textbox(label = "Input", lines=2, placeholder="Text Here..."),
57
+ outputs=[gr.Markdown("Output"), gr.Textbox(label = "Grammar Fluency Score"), gr.Textbox(label = "Flesch Reading Score"), gr.Markdown()],
58
+ allow_flagging="never"
59
+ )
60
+
61
+ demo.launch(share = True)
62
+
63
+
64
+ # In[ ]:
65
+
66
+
67
+
68
+
69
+
70
+ # In[ ]:
71
+
72
+
73
+
74
+
75
+
76
+ # In[ ]:
77
+
78
+
79
+
80
+
gramformerjohn.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Gramformer:
2
+
3
+ def __init__(self, models=1, use_gpu=False):
4
+ from transformers import AutoTokenizer
5
+ from transformers import AutoModelForSeq2SeqLM
6
+ #from lm_scorer.models.auto import AutoLMScorer as LMScorer
7
+ import errant
8
+ import spacy
9
+ import en_core_web_sm
10
+ nlp = en_core_web_sm.load()
11
+ self.annotator = errant.load('en', nlp)
12
+
13
+ if use_gpu:
14
+ device= "cuda:0"
15
+ else:
16
+ device = "cpu"
17
+ batch_size = 1
18
+ #self.scorer = LMScorer.from_pretrained("gpt2", device=device, batch_size=batch_size)
19
+ self.device = device
20
+ correction_model_tag = "prithivida/grammar_error_correcter_v1"
21
+ self.model_loaded = False
22
+
23
+ if models == 1:
24
+ self.correction_tokenizer = AutoTokenizer.from_pretrained(correction_model_tag, use_auth_token=False)
25
+ self.correction_model = AutoModelForSeq2SeqLM.from_pretrained(correction_model_tag, use_auth_token=False)
26
+ self.correction_model = self.correction_model.to(device)
27
+ self.model_loaded = True
28
+ print("[Gramformer] Grammar error correct/highlight model loaded..")
29
+ elif models == 2:
30
+ # TODO
31
+ print("TO BE IMPLEMENTED!!!")
32
+
33
+ def correct(self, input_sentence, max_candidates=1):
34
+ if self.model_loaded:
35
+ correction_prefix = "gec: "
36
+ input_sentence = correction_prefix + input_sentence
37
+ input_ids = self.correction_tokenizer.encode(input_sentence, return_tensors='pt')
38
+ input_ids = input_ids.to(self.device)
39
+
40
+ preds = self.correction_model.generate(
41
+ input_ids,
42
+ do_sample=True,
43
+ max_length=128,
44
+ # top_k=50,
45
+ # top_p=0.95,
46
+ num_beams=7,
47
+ early_stopping=True,
48
+ num_return_sequences=max_candidates)
49
+
50
+ corrected = set()
51
+ for pred in preds:
52
+ corrected.add(self.correction_tokenizer.decode(pred, skip_special_tokens=True).strip())
53
+
54
+ #corrected = list(corrected)
55
+ #scores = self.scorer.sentence_score(corrected, log=True)
56
+ #ranked_corrected = [(c,s) for c, s in zip(corrected, scores)]
57
+ #ranked_corrected.sort(key = lambda x:x[1], reverse=True)
58
+ return corrected
59
+ else:
60
+ print("Model is not loaded")
61
+ return None
62
+
63
+ def highlight(self, orig, cor):
64
+
65
+ edits = self._get_edits(orig, cor)
66
+ orig_tokens = orig.split()
67
+
68
+ ignore_indexes = []
69
+
70
+ for edit in edits:
71
+ edit_type = edit[0]
72
+ edit_str_start = edit[1]
73
+ edit_spos = edit[2]
74
+ edit_epos = edit[3]
75
+ edit_str_end = edit[4]
76
+
77
+ # if no_of_tokens(edit_str_start) > 1 ==> excluding the first token, mark all other tokens for deletion
78
+ for i in range(edit_spos+1, edit_epos):
79
+ ignore_indexes.append(i)
80
+
81
+ if edit_str_start == "":
82
+ if edit_spos - 1 >= 0:
83
+ new_edit_str = orig_tokens[edit_spos - 1]
84
+ edit_spos -= 1
85
+ else:
86
+ new_edit_str = orig_tokens[edit_spos + 1]
87
+ edit_spos += 1
88
+ if edit_type == "PUNCT":
89
+ st = "<s>" + edit_str_end + "</s> " + new_edit_str
90
+ else:
91
+ st = "<s>" + edit_str_end + "</s> " + new_edit_str
92
+ orig_tokens[edit_spos] = st
93
+ elif edit_str_end == "":
94
+ st = "<s>""</s> " + edit_str_start
95
+ orig_tokens[edit_spos] = st
96
+ else:
97
+ st = "<s>" + edit_str_end + "</s> " + edit_str_start
98
+ orig_tokens[edit_spos] = st
99
+
100
+ for i in sorted(ignore_indexes, reverse=True):
101
+ del(orig_tokens[i])
102
+
103
+ return(" ".join(orig_tokens))
104
+
105
+ def detect(self, input_sentence):
106
+ # TO BE IMPLEMENTED
107
+ pass
108
+
109
+ def _get_edits(self, orig, cor):
110
+ orig = self.annotator.parse(orig)
111
+ cor = self.annotator.parse(cor)
112
+ alignment = self.annotator.align(orig, cor)
113
+ edits = self.annotator.merge(alignment)
114
+
115
+ if len(edits) == 0:
116
+ return []
117
+
118
+ edit_annotations = []
119
+ for e in edits:
120
+ e = self.annotator.classify(e)
121
+ edit_annotations.append((e.type[2:], e.o_str, e.o_start, e.o_end, e.c_str, e.c_start, e.c_end))
122
+
123
+ if len(edit_annotations) > 0:
124
+ return edit_annotations
125
+ else:
126
+ return []
127
+
128
+ def get_edits(self, orig, cor):
129
+ return self._get_edits(orig, cor)
grammardemo.ipynb ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {
7
+ "colab": {
8
+ "base_uri": "https://localhost:8080/",
9
+ "height": 373
10
+ },
11
+ "id": "0i11Ui2Gj0Em",
12
+ "outputId": "c1f1adad-a6e3-4fa0-da79-887d00eb39d8"
13
+ },
14
+ "outputs": [
15
+ {
16
+ "name": "stderr",
17
+ "output_type": "stream",
18
+ "text": [
19
+ "/usr/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
20
+ " from .autonotebook import tqdm as notebook_tqdm\n"
21
+ ]
22
+ }
23
+ ],
24
+ "source": [
25
+ "from gramformerjohn import Gramformer\n",
26
+ "import gradio as gr\n",
27
+ "\n",
28
+ "import spacy\n",
29
+ "\n",
30
+ "\n"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 2,
36
+ "metadata": {
37
+ "colab": {
38
+ "base_uri": "https://localhost:8080/"
39
+ },
40
+ "id": "XKBBiv8Uk37_",
41
+ "outputId": "54c1c44e-1eb2-497a-945f-83fe11ee4773"
42
+ },
43
+ "outputs": [
44
+ {
45
+ "name": "stderr",
46
+ "output_type": "stream",
47
+ "text": [
48
+ "/home/john/.local/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1714: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.\n",
49
+ " warnings.warn(\n",
50
+ "/home/john/.local/lib/python3.11/site-packages/transformers/modeling_utils.py:2193: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.\n",
51
+ " warnings.warn(\n"
52
+ ]
53
+ },
54
+ {
55
+ "name": "stdout",
56
+ "output_type": "stream",
57
+ "text": [
58
+ "[Gramformer] Grammar error correct/highlight model loaded..\n"
59
+ ]
60
+ }
61
+ ],
62
+ "source": [
63
+ "\n",
64
+ "gf = Gramformer(models = 1, use_gpu = False)\n"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 3,
70
+ "metadata": {
71
+ "colab": {
72
+ "base_uri": "https://localhost:8080/"
73
+ },
74
+ "id": "k3ZdYre-piXe",
75
+ "outputId": "012e9bf7-e714-44fd-89ac-ecaf87a71af1"
76
+ },
77
+ "outputs": [],
78
+ "source": [
79
+ "name = \"how are you\"\n"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": 13,
85
+ "metadata": {
86
+ "id": "riR4iMHH7FCf"
87
+ },
88
+ "outputs": [],
89
+ "source": [
90
+ "from readability import Readability\n",
91
+ "import textstat\n",
92
+ "def reading_score(sentences):\n",
93
+ " return Readability(sentences).flesch()"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "markdown",
98
+ "metadata": {
99
+ "id": "zCUtKCJVwzcI"
100
+ },
101
+ "source": []
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": 5,
106
+ "metadata": {
107
+ "id": "yjcDd1cMxNw5"
108
+ },
109
+ "outputs": [],
110
+ "source": [
111
+ "def levenstein_score(correct_output, sentences):\n",
112
+ " max_wrong = max(len(correct_output), len(sentences))\n",
113
+ " actual_wrong = distance(correct_output, sentences)\n",
114
+ " return (max_wrong - actual_wrong)/max_wrong\n"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": 28,
120
+ "metadata": {
121
+ "colab": {
122
+ "base_uri": "https://localhost:8080/",
123
+ "height": 373
124
+ },
125
+ "id": "O1c4swCfk9tH",
126
+ "outputId": "7ec07a6c-d3de-4758-ec00-48ff29084b37"
127
+ },
128
+ "outputs": [
129
+ {
130
+ "name": "stdout",
131
+ "output_type": "stream",
132
+ "text": [
133
+ "Running on local URL: http://127.0.0.1:7889\n",
134
+ "Running on public URL: https://8a44ed60ed4fd74dcc.gradio.live\n",
135
+ "\n",
136
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
137
+ ]
138
+ },
139
+ {
140
+ "data": {
141
+ "text/html": [
142
+ "<div><iframe src=\"https://8a44ed60ed4fd74dcc.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
143
+ ],
144
+ "text/plain": [
145
+ "<IPython.core.display.HTML object>"
146
+ ]
147
+ },
148
+ "metadata": {},
149
+ "output_type": "display_data"
150
+ },
151
+ {
152
+ "data": {
153
+ "text/plain": []
154
+ },
155
+ "execution_count": 28,
156
+ "metadata": {},
157
+ "output_type": "execute_result"
158
+ }
159
+ ],
160
+ "source": [
161
+ "import gradio as gr\n",
162
+ "import textstat\n",
163
+ "\n",
164
+ "from Levenshtein import distance\n",
165
+ "def correct_sentence(sentences):\n",
166
+ " if(len(sentences) == 0):\n",
167
+ " return 'Output','-', '-', \"Please Input Text.\"\n",
168
+ " sentences = sentences.strip()\n",
169
+ " corrected = gf.correct(sentences)\n",
170
+ " for corrected_setence in corrected:\n",
171
+ " correct_output = corrected_setence\n",
172
+ " return 'Output', round(levenstein_score(correct_output, sentences)*100,2), textstat.flesch_reading_ease(sentences), gf.highlight(correct_output,sentences) \n",
173
+ "\n",
174
+ "demo = gr.Interface(\n",
175
+ " fn=correct_sentence,\n",
176
+ " inputs=gr.Textbox(label = \"Input\", lines=2, placeholder=\"Text Here...\"),\n",
177
+ " outputs=[gr.Markdown(\"Output\"), gr.Textbox(label = \"Grammar Fluency Score\"), gr.Textbox(label = \"Flesch Reading Score\"), gr.Markdown()],\n",
178
+ " allow_flagging=\"never\" \n",
179
+ ")\n",
180
+ "\n",
181
+ "demo.launch(share = True)"
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "code",
186
+ "execution_count": null,
187
+ "metadata": {
188
+ "id": "f7dJEf_-xMH7"
189
+ },
190
+ "outputs": [],
191
+ "source": []
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "execution_count": null,
196
+ "metadata": {
197
+ "id": "NaAQdzKbo4Xx"
198
+ },
199
+ "outputs": [],
200
+ "source": []
201
+ },
202
+ {
203
+ "cell_type": "code",
204
+ "execution_count": null,
205
+ "metadata": {
206
+ "id": "tEdhc8DBjOb4"
207
+ },
208
+ "outputs": [],
209
+ "source": []
210
+ }
211
+ ],
212
+ "metadata": {
213
+ "colab": {
214
+ "provenance": []
215
+ },
216
+ "kernelspec": {
217
+ "display_name": "Python 3.11.3 64-bit",
218
+ "language": "python",
219
+ "name": "python3"
220
+ },
221
+ "language_info": {
222
+ "codemirror_mode": {
223
+ "name": "ipython",
224
+ "version": 3
225
+ },
226
+ "file_extension": ".py",
227
+ "mimetype": "text/x-python",
228
+ "name": "python",
229
+ "nbconvert_exporter": "python",
230
+ "pygments_lexer": "ipython3",
231
+ "version": "3.11.3"
232
+ },
233
+ "vscode": {
234
+ "interpreter": {
235
+ "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
236
+ }
237
+ }
238
+ },
239
+ "nbformat": 4,
240
+ "nbformat_minor": 0
241
+ }
grammardemo.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[1]:
5
+
6
+
7
+ from gramformerjohn import Gramformer
8
+ import gradio as gr
9
+
10
+ import spacy
11
+
12
+
13
+
14
+
15
+ # In[2]:
16
+
17
+
18
+ gf = Gramformer(models = 1, use_gpu = False)
19
+
20
+
21
+ # In[3]:
22
+
23
+
24
+ name = "how are you"
25
+
26
+
27
+ # In[13]:
28
+
29
+
30
+ from readability import Readability
31
+ import textstat
32
+ def reading_score(sentences):
33
+ return Readability(sentences).flesch()
34
+
35
+
36
+ #
37
+
38
+ # In[5]:
39
+
40
+
41
+ def levenstein_score(correct_output, sentences):
42
+ max_wrong = max(len(correct_output), len(sentences))
43
+ actual_wrong = distance(correct_output, sentences)
44
+ return (max_wrong - actual_wrong)/max_wrong
45
+
46
+
47
+ # In[28]:
48
+
49
+
50
+ import gradio as gr
51
+ import textstat
52
+
53
+ from Levenshtein import distance
54
+ def correct_sentence(sentences):
55
+ if(len(sentences) == 0):
56
+ return 'Output','-', '-', "Please Input Text."
57
+ sentences = sentences.strip()
58
+ corrected = gf.correct(sentences)
59
+ for corrected_setence in corrected:
60
+ correct_output = corrected_setence
61
+ return 'Output', round(levenstein_score(correct_output, sentences)*100,2), textstat.flesch_reading_ease(sentences), gf.highlight(correct_output,sentences)
62
+
63
+ demo = gr.Interface(
64
+ fn=correct_sentence,
65
+ inputs=gr.Textbox(label = "Input", lines=2, placeholder="Text Here..."),
66
+ outputs=[gr.Markdown("Output"), gr.Textbox(label = "Grammar Fluency Score"), gr.Textbox(label = "Flesch Reading Score"), gr.Markdown()],
67
+ allow_flagging="never"
68
+ )
69
+
70
+ demo.launch(share = True)
71
+
72
+
73
+ # In[ ]:
74
+
75
+
76
+
77
+
78
+
79
+ # In[ ]:
80
+
81
+
82
+
83
+
84
+
85
+ # In[ ]:
86
+
87
+
88
+
89
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ errant==2.3.3
2
+ gradio==3.38.0
3
+ python_Levenshtein==0.21.1
4
+ python_Levenshtein==0.21.1
5
+ readability_lxml==0.8.1
6
+ spacy==2.3.9
7
+ textstat==0.7.3
8
+ transformers==4.31.0
9
+ torch==2.0.1
10
+ en_core_web_sm @ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz
test.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Tiket Titel: Bug Fxing for Login Pag
2
+
3
+ Discription:
4
+ Hey, we gott a big problam with our website login pag, and it's realy urgnt to get this fxd! Our usrs are complaning alot and we cant affrd to loos custumrs. Plz, we ned help asap.
5
+
6
+ So heres the issuse:
7
+ When a usr tries to log in, the pag isnt wrking properely. Somtimes, it doznt redirect to the dashboard, and othertimes, it taks them to a blnk pag with an errer msg. This is realy frustating for our usrs and they r loosing trust in us. We need this fxd immediatly!
8
+
9
+ Step to Reprooduce:
10
+ 1. Go to the logn pag
11
+ 2. Enter your usrnme and passwrd
12
+ 3. Clcik on the logn buton
13
+ 4. Somtimes, you wil be redircted to the dashbord, but othertimes, you wil see an errer msg on a blnk pag.
14
+
15
+ Expcted Ressult:
16
+ After loging in, the usr should be redircted to their dashbord evry time without any errers.
17
+
18
+ Actul Ressult:
19
+ The usr is redircted to a blnk pag with an errer msg occassionly.
20
+
21
+ Screenshott: (no attachmnt or image providd)
22
+
23
+ Additioonal Informasion:
24
+ We've noticd that this issu happens on diffrent browsrs like Chrome, Firebox, and even Intenet Explorr. We also trid loging in from mobil and desktop, and the issue is the same. We realli need a fix asap, and we'r hping you guys can figure it out.
25
+
26
+ Thank u so much for your help in advanse!
27
+