huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Sep 11, 2024

Commit

a456e86

verified ·

1 Parent(s): a92f0c3

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -57

app.py CHANGED Viewed

@@ -5,10 +5,6 @@ import spacy
 import subprocess
 import nltk
 from nltk.corpus import wordnet
-from textblob import TextBlob
-from pattern.en import conjugate, lemma, pluralize, singularize
-from gector.gec_model import GecBERTModel  # Import GECToR Model
-from utils.helpers import read_lines, normalize  # GECToR utilities
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -55,71 +51,63 @@ def capitalize_sentences_and_nouns(text):
     return ' '.join(corrected_text)
-# Function to correct tense errors using Pattern
 def correct_tense_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
-        if token.pos_ == "VERB":
-            # Use conjugate from Pattern to adjust the tense of the verb
-            verb_form = conjugate(lemma(token.text), tense='past')  # Example: fix to past tense
-            corrected_text.append(verb_form)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
-# Function to correct singular/plural errors using Pattern
 def correct_singular_plural_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
         if token.pos_ == "NOUN":
             if token.tag_ == "NN":  # Singular noun
-                corrected_text.append(singularize(token.text))
             elif token.tag_ == "NNS":  # Plural noun
-                corrected_text.append(pluralize(token.text))
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
-# Function to correct overall grammar using TextBlob
-def correct_grammar_textblob(text):
-    blob = TextBlob(text)
-    corrected_text = str(blob.correct())  # TextBlob's built-in grammar correction
-    return corrected_text
-# Initialize GECToR Model for Grammar Correction
-def load_gector_model():
-    model_path = ["gector/roberta_1_gector.th"]  # Ensure model file is placed correctly
-    vocab_path = "output_vocabulary"
-    model = GecBERTModel(vocab_path=vocab_path,
-                         model_paths=model_path,
-                         max_len=50,
-                         min_len=3,
-                         iterations=5,
-                         min_error_probability=0.0,
-                         lowercase_tokens=0,
-                         model_name="roberta",
-                         special_tokens_fix=1,
-                         log=False,
-                         confidence=0,
-                         del_confidence=0,
-                         is_ensemble=False,
-                         weigths=None)
-    return model
-# Load the GECToR model
-gector_model = load_gector_model()
-# Function to correct grammar using GECToR
-def correct_grammar_gector(text):
-    sentences = [text.split()]
-    corrected_sentences, _ = gector_model.handle_batch(sentences)
-    return " ".join(corrected_sentences[0])
 # Paraphrasing function using SpaCy and NLTK (Humanifier)
 def paraphrase_with_spacy_nltk(text):
@@ -146,17 +134,27 @@ def paraphrase_with_spacy_nltk(text):
         else:
             paraphrased_words.append(token.text)
-    return ' '.join(paraphrased_words)
 # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
 def paraphrase_and_correct(text):
     # Step 1: Paraphrase the text
     paraphrased_text = paraphrase_with_spacy_nltk(text)
-    # Step 2: Apply grammatical corrections using GECToR
-    corrected_text = correct_grammar_gector(paraphrased_text)
-    return corrected_text
 # Gradio app setup with two tabs
 with gr.Blocks() as demo:
@@ -166,14 +164,16 @@ with gr.Blocks() as demo:
         label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
         score1 = gr.Textbox(lines=1, label='Prob')
         button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
     with gr.Tab("Humanifier"):
         text_input = gr.Textbox(lines=5, label="Input Text")
         paraphrase_button = gr.Button("Paraphrase & Correct")
-        output_text = gr.Textbox(label="Paraphrased and Corrected Text")
         paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
-# Launch the app
-demo.launch()

 import subprocess
 import nltk
 from nltk.corpus import wordnet
 # Initialize the English text classification pipeline for AI detection
 pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
     return ' '.join(corrected_text)
+# Function to correct tense errors in a sentence (Tense Correction)
 def correct_tense_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
+        # Check for tense correction based on modal verbs
+        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
+            # Replace with appropriate verb form
+            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
+            corrected_text.append(lemma)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
+# Function to correct singular/plural errors (Singular/Plural Correction)
 def correct_singular_plural_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
         if token.pos_ == "NOUN":
+            # Check if the noun is singular or plural
             if token.tag_ == "NN":  # Singular noun
+                # Look for determiners like "many" to correct to plural
+                if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
+                    corrected_text.append(token.lemma_ + 's')
+                else:
+                    corrected_text.append(token.text)
             elif token.tag_ == "NNS":  # Plural noun
+                # Look for determiners like "a", "one" to correct to singular
+                if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
+                    corrected_text.append(token.lemma_)
+                else:
+                    corrected_text.append(token.text)
+            else:
+                corrected_text.append(token.text)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
+# Function to check and correct article errors
+def correct_article_errors(text):
+    doc = nlp(text)
+    corrected_text = []
+    for token in doc:
+        if token.text in ['a', 'an']:
+            next_token = token.nbor(1)
+            if token.text == "a" and next_token.text[0].lower() in "aeiou":
+                corrected_text.append("an")
+            elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
+                corrected_text.append("a")
+            else:
+                corrected_text.append(token.text)
+        else:
+            corrected_text.append(token.text)
+    return ' '.join(corrected_text)
 # Paraphrasing function using SpaCy and NLTK (Humanifier)
 def paraphrase_with_spacy_nltk(text):
         else:
             paraphrased_words.append(token.text)
+    # Join the words back into a sentence
+    paraphrased_sentence = ' '.join(paraphrased_words)
+    return paraphrased_sentence
 # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
 def paraphrase_and_correct(text):
     # Step 1: Paraphrase the text
     paraphrased_text = paraphrase_with_spacy_nltk(text)
+    # Step 2: Apply grammatical corrections on the paraphrased text
+    corrected_text = correct_article_errors(paraphrased_text)
+    corrected_text = capitalize_sentences_and_nouns(corrected_text)
+    corrected_text = correct_singular_plural_errors(corrected_text)
+    # Step 3: Capitalize sentences and proper nouns (final correction step)
+    final_text = correct_tense_errors(corrected_text)
+    return final_text
 # Gradio app setup with two tabs
 with gr.Blocks() as demo:
         label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
         score1 = gr.Textbox(lines=1, label='Prob')
+        # Connect the prediction function to the button
         button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
     with gr.Tab("Humanifier"):
         text_input = gr.Textbox(lines=5, label="Input Text")
         paraphrase_button = gr.Button("Paraphrase & Correct")
+        output_text = gr.Textbox(label="Paraphrased Text")
+        # Connect the paraphrasing function to the button
         paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
+# Launch the app with the remaining functionalities
+demo.launch()