Spaces:

johnjets
/

grammar

Runtime error

App Files Files Community

johnjets commited on Jul 25, 2023

Commit

882d84f

•

1 Parent(s): a9eabfe

Browse files

Files changed (7) hide show

__pycache__/gramformerjohn.cpython-311.pyc +0 -0
app.py +80 -0
gramformerjohn.py +129 -0
grammardemo.ipynb +241 -0
grammardemo.py +89 -0
requirements.txt +10 -0
test.txt +27 -0

__pycache__/gramformerjohn.cpython-311.pyc ADDED Viewed

Binary file (5.67 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+from gramformerjohn import Gramformer
+import gradio as gr
+import spacy
+# In[2]:
+gf = Gramformer(models = 1, use_gpu = False)
+# In[3]:
+name = "how are you"
+# In[13]:
+# In[5]:
+def levenstein_score(correct_output, sentences):
+  max_wrong = max(len(correct_output), len(sentences))
+  actual_wrong = distance(correct_output, sentences)
+  return (max_wrong - actual_wrong)/max_wrong
+# In[28]:
+import gradio as gr
+import textstat
+from Levenshtein import distance
+def correct_sentence(sentences):
+  if(len(sentences) == 0):
+      return 'Output','-', '-', "Please Input Text."
+  sentences = sentences.strip()
+  corrected = gf.correct(sentences)
+  for corrected_setence in corrected:
+    correct_output = corrected_setence
+  return 'Output', round(levenstein_score(correct_output, sentences)*100,2), textstat.flesch_reading_ease(sentences), gf.highlight(correct_output,sentences)
+demo = gr.Interface(
+    fn=correct_sentence,
+    inputs=gr.Textbox(label = "Input", lines=2, placeholder="Text Here..."),
+    outputs=[gr.Markdown("Output"), gr.Textbox(label = "Grammar Fluency Score"), gr.Textbox(label = "Flesch Reading Score"), gr.Markdown()],
+    allow_flagging="never"
+)
+demo.launch(share = True)
+# In[ ]:
+# In[ ]:
+# In[ ]:

gramformerjohn.py ADDED Viewed

	@@ -0,0 +1,129 @@

+class Gramformer:
+  def __init__(self, models=1, use_gpu=False):
+    from transformers import AutoTokenizer
+    from transformers import AutoModelForSeq2SeqLM
+    #from lm_scorer.models.auto import AutoLMScorer as LMScorer
+    import errant
+    import spacy
+    import en_core_web_sm
+    nlp = en_core_web_sm.load()
+    self.annotator = errant.load('en', nlp)
+    if use_gpu:
+        device= "cuda:0"
+    else:
+        device = "cpu"
+    batch_size = 1
+    #self.scorer = LMScorer.from_pretrained("gpt2", device=device, batch_size=batch_size)
+    self.device    = device
+    correction_model_tag = "prithivida/grammar_error_correcter_v1"
+    self.model_loaded = False
+    if models == 1:
+        self.correction_tokenizer = AutoTokenizer.from_pretrained(correction_model_tag, use_auth_token=False)
+        self.correction_model     = AutoModelForSeq2SeqLM.from_pretrained(correction_model_tag, use_auth_token=False)
+        self.correction_model     = self.correction_model.to(device)
+        self.model_loaded = True
+        print("[Gramformer] Grammar error correct/highlight model loaded..")
+    elif models == 2:
+        # TODO
+        print("TO BE IMPLEMENTED!!!")
+  def correct(self, input_sentence, max_candidates=1):
+      if self.model_loaded:
+        correction_prefix = "gec: "
+        input_sentence = correction_prefix + input_sentence
+        input_ids = self.correction_tokenizer.encode(input_sentence, return_tensors='pt')
+        input_ids = input_ids.to(self.device)
+        preds = self.correction_model.generate(
+            input_ids,
+            do_sample=True,
+            max_length=128,
+#             top_k=50,
+#             top_p=0.95,
+            num_beams=7,
+            early_stopping=True,
+            num_return_sequences=max_candidates)
+        corrected = set()
+        for pred in preds:
+          corrected.add(self.correction_tokenizer.decode(pred, skip_special_tokens=True).strip())
+        #corrected = list(corrected)
+        #scores = self.scorer.sentence_score(corrected, log=True)
+        #ranked_corrected = [(c,s) for c, s in zip(corrected, scores)]
+        #ranked_corrected.sort(key = lambda x:x[1], reverse=True)
+        return corrected
+      else:
+        print("Model is not loaded")
+        return None
+  def highlight(self, orig, cor):
+      edits = self._get_edits(orig, cor)
+      orig_tokens = orig.split()
+      ignore_indexes = []
+      for edit in edits:
+          edit_type = edit[0]
+          edit_str_start = edit[1]
+          edit_spos = edit[2]
+          edit_epos = edit[3]
+          edit_str_end = edit[4]
+          # if no_of_tokens(edit_str_start) > 1 ==> excluding the first token, mark all other tokens for deletion
+          for i in range(edit_spos+1, edit_epos):
+            ignore_indexes.append(i)
+          if edit_str_start == "":
+              if edit_spos - 1 >= 0:
+                  new_edit_str = orig_tokens[edit_spos - 1]
+                  edit_spos -= 1
+              else:
+                  new_edit_str = orig_tokens[edit_spos + 1]
+                  edit_spos += 1
+              if edit_type == "PUNCT":
+                st = "<s>" + edit_str_end + "</s> " + new_edit_str
+              else:
+                st = "<s>" + edit_str_end + "</s> " + new_edit_str
+              orig_tokens[edit_spos] = st
+          elif edit_str_end == "":
+            st = "<s>""</s> " + edit_str_start
+            orig_tokens[edit_spos] = st
+          else:
+            st = "<s>" + edit_str_end + "</s> " + edit_str_start
+            orig_tokens[edit_spos] = st
+      for i in sorted(ignore_indexes, reverse=True):
+        del(orig_tokens[i])
+      return(" ".join(orig_tokens))
+  def detect(self, input_sentence):
+        # TO BE IMPLEMENTED
+        pass
+  def _get_edits(self, orig, cor):
+        orig = self.annotator.parse(orig)
+        cor = self.annotator.parse(cor)
+        alignment = self.annotator.align(orig, cor)
+        edits = self.annotator.merge(alignment)
+        if len(edits) == 0:
+            return []
+        edit_annotations = []
+        for e in edits:
+            e = self.annotator.classify(e)
+            edit_annotations.append((e.type[2:], e.o_str, e.o_start, e.o_end,  e.c_str, e.c_start, e.c_end))
+        if len(edit_annotations) > 0:
+            return edit_annotations
+        else:
+            return []
+  def get_edits(self, orig, cor):
+      return self._get_edits(orig, cor)

grammardemo.ipynb ADDED Viewed

	@@ -0,0 +1,241 @@

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 373
+        },
+        "id": "0i11Ui2Gj0Em",
+        "outputId": "c1f1adad-a6e3-4fa0-da79-887d00eb39d8"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/usr/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+            "  from .autonotebook import tqdm as notebook_tqdm\n"
+          ]
+        }
+      ],
+      "source": [
+        "from gramformerjohn import Gramformer\n",
+        "import gradio as gr\n",
+        "\n",
+        "import spacy\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "XKBBiv8Uk37_",
+        "outputId": "54c1c44e-1eb2-497a-945f-83fe11ee4773"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/home/john/.local/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1714: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.\n",
+            "  warnings.warn(\n",
+            "/home/john/.local/lib/python3.11/site-packages/transformers/modeling_utils.py:2193: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "[Gramformer] Grammar error correct/highlight model loaded..\n"
+          ]
+        }
+      ],
+      "source": [
+        "\n",
+        "gf = Gramformer(models = 1, use_gpu = False)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "k3ZdYre-piXe",
+        "outputId": "012e9bf7-e714-44fd-89ac-ecaf87a71af1"
+      },
+      "outputs": [],
+      "source": [
+        "name = \"how are you\"\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "metadata": {
+        "id": "riR4iMHH7FCf"
+      },
+      "outputs": [],
+      "source": [
+        "from readability import Readability\n",
+        "import textstat\n",
+        "def reading_score(sentences):\n",
+        "  return Readability(sentences).flesch()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zCUtKCJVwzcI"
+      },
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "id": "yjcDd1cMxNw5"
+      },
+      "outputs": [],
+      "source": [
+        "def levenstein_score(correct_output, sentences):\n",
+        "  max_wrong = max(len(correct_output), len(sentences))\n",
+        "  actual_wrong = distance(correct_output, sentences)\n",
+        "  return (max_wrong - actual_wrong)/max_wrong\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 28,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 373
+        },
+        "id": "O1c4swCfk9tH",
+        "outputId": "7ec07a6c-d3de-4758-ec00-48ff29084b37"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Running on local URL:  http://127.0.0.1:7889\n",
+            "Running on public URL: https://8a44ed60ed4fd74dcc.gradio.live\n",
+            "\n",
+            "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div><iframe src=\"https://8a44ed60ed4fd74dcc.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 28,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "import gradio as gr\n",
+        "import textstat\n",
+        "\n",
+        "from Levenshtein import distance\n",
+        "def correct_sentence(sentences):\n",
+        "  if(len(sentences) == 0):\n",
+        "      return 'Output','-', '-', \"Please Input Text.\"\n",
+        "  sentences = sentences.strip()\n",
+        "  corrected = gf.correct(sentences)\n",
+        "  for corrected_setence in corrected:\n",
+        "    correct_output = corrected_setence\n",
+        "  return 'Output', round(levenstein_score(correct_output, sentences)*100,2), textstat.flesch_reading_ease(sentences), gf.highlight(correct_output,sentences) \n",
+        "\n",
+        "demo = gr.Interface(\n",
+        "    fn=correct_sentence,\n",
+        "    inputs=gr.Textbox(label = \"Input\", lines=2, placeholder=\"Text Here...\"),\n",
+        "    outputs=[gr.Markdown(\"Output\"), gr.Textbox(label = \"Grammar Fluency Score\"), gr.Textbox(label = \"Flesch Reading Score\"), gr.Markdown()],\n",
+        "    allow_flagging=\"never\" \n",
+        ")\n",
+        "\n",
+        "demo.launch(share = True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "f7dJEf_-xMH7"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NaAQdzKbo4Xx"
+      },
+      "outputs": [],
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tEdhc8DBjOb4"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3.11.3 64-bit",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.3"
+    },
+    "vscode": {
+      "interpreter": {
+        "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

grammardemo.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+from gramformerjohn import Gramformer
+import gradio as gr
+import spacy
+# In[2]:
+gf = Gramformer(models = 1, use_gpu = False)
+# In[3]:
+name = "how are you"
+# In[13]:
+from readability import Readability
+import textstat
+def reading_score(sentences):
+  return Readability(sentences).flesch()
+#
+# In[5]:
+def levenstein_score(correct_output, sentences):
+  max_wrong = max(len(correct_output), len(sentences))
+  actual_wrong = distance(correct_output, sentences)
+  return (max_wrong - actual_wrong)/max_wrong
+# In[28]:
+import gradio as gr
+import textstat
+from Levenshtein import distance
+def correct_sentence(sentences):
+  if(len(sentences) == 0):
+      return 'Output','-', '-', "Please Input Text."
+  sentences = sentences.strip()
+  corrected = gf.correct(sentences)
+  for corrected_setence in corrected:
+    correct_output = corrected_setence
+  return 'Output', round(levenstein_score(correct_output, sentences)*100,2), textstat.flesch_reading_ease(sentences), gf.highlight(correct_output,sentences)
+demo = gr.Interface(
+    fn=correct_sentence,
+    inputs=gr.Textbox(label = "Input", lines=2, placeholder="Text Here..."),
+    outputs=[gr.Markdown("Output"), gr.Textbox(label = "Grammar Fluency Score"), gr.Textbox(label = "Flesch Reading Score"), gr.Markdown()],
+    allow_flagging="never"
+)
+demo.launch(share = True)
+# In[ ]:
+# In[ ]:
+# In[ ]:

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+errant==2.3.3
+gradio==3.38.0
+python_Levenshtein==0.21.1
+python_Levenshtein==0.21.1
+readability_lxml==0.8.1
+spacy==2.3.9
+textstat==0.7.3
+transformers==4.31.0
+torch==2.0.1
+en_core_web_sm @ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz

test.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+Tiket Titel: Bug Fxing for Login Pag
+Discription:
+Hey, we gott a big problam with our website login pag, and it's realy urgnt to get this fxd! Our usrs are complaning alot and we cant affrd to loos custumrs. Plz, we ned help asap.
+So heres the issuse:
+When a usr tries to log in, the pag isnt wrking properely. Somtimes, it doznt redirect to the dashboard, and othertimes, it taks them to a blnk pag with an errer msg. This is realy frustating for our usrs and they r loosing trust in us. We need this fxd immediatly!
+Step to Reprooduce:
+1. Go to the logn pag
+2. Enter your usrnme and passwrd
+3. Clcik on the logn buton
+4. Somtimes, you wil be redircted to the dashbord, but othertimes, you wil see an errer msg on a blnk pag.
+Expcted Ressult:
+After loging in, the usr should be redircted to their dashbord evry time without any errers.
+Actul Ressult:
+The usr is redircted to a blnk pag with an errer msg occassionly.
+Screenshott: (no attachmnt or image providd)
+Additioonal Informasion:
+We've noticd that this issu happens on diffrent browsrs like Chrome, Firebox, and even Intenet Explorr. We also trid loging in from mobil and desktop, and the issue is the same. We realli need a fix asap, and we'r hping you guys can figure it out.
+Thank u so much for your help in advanse!

more