sashtech commited on
Commit
a456e86
·
verified ·
1 Parent(s): a92f0c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -57
app.py CHANGED
@@ -5,10 +5,6 @@ import spacy
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
- from textblob import TextBlob
9
- from pattern.en import conjugate, lemma, pluralize, singularize
10
- from gector.gec_model import GecBERTModel # Import GECToR Model
11
- from utils.helpers import read_lines, normalize # GECToR utilities
12
 
13
  # Initialize the English text classification pipeline for AI detection
14
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
@@ -55,71 +51,63 @@ def capitalize_sentences_and_nouns(text):
55
 
56
  return ' '.join(corrected_text)
57
 
58
- # Function to correct tense errors using Pattern
59
  def correct_tense_errors(text):
60
  doc = nlp(text)
61
  corrected_text = []
62
-
63
  for token in doc:
64
- if token.pos_ == "VERB":
65
- # Use conjugate from Pattern to adjust the tense of the verb
66
- verb_form = conjugate(lemma(token.text), tense='past') # Example: fix to past tense
67
- corrected_text.append(verb_form)
 
68
  else:
69
  corrected_text.append(token.text)
70
-
71
  return ' '.join(corrected_text)
72
 
73
- # Function to correct singular/plural errors using Pattern
74
  def correct_singular_plural_errors(text):
75
  doc = nlp(text)
76
  corrected_text = []
77
-
78
  for token in doc:
79
  if token.pos_ == "NOUN":
 
80
  if token.tag_ == "NN": # Singular noun
81
- corrected_text.append(singularize(token.text))
 
 
 
 
82
  elif token.tag_ == "NNS": # Plural noun
83
- corrected_text.append(pluralize(token.text))
 
 
 
 
 
 
84
  else:
85
  corrected_text.append(token.text)
86
-
87
  return ' '.join(corrected_text)
88
 
89
- # Function to correct overall grammar using TextBlob
90
- def correct_grammar_textblob(text):
91
- blob = TextBlob(text)
92
- corrected_text = str(blob.correct()) # TextBlob's built-in grammar correction
93
- return corrected_text
94
-
95
- # Initialize GECToR Model for Grammar Correction
96
- def load_gector_model():
97
- model_path = ["gector/roberta_1_gector.th"] # Ensure model file is placed correctly
98
- vocab_path = "output_vocabulary"
99
- model = GecBERTModel(vocab_path=vocab_path,
100
- model_paths=model_path,
101
- max_len=50,
102
- min_len=3,
103
- iterations=5,
104
- min_error_probability=0.0,
105
- lowercase_tokens=0,
106
- model_name="roberta",
107
- special_tokens_fix=1,
108
- log=False,
109
- confidence=0,
110
- del_confidence=0,
111
- is_ensemble=False,
112
- weigths=None)
113
- return model
114
-
115
- # Load the GECToR model
116
- gector_model = load_gector_model()
117
-
118
- # Function to correct grammar using GECToR
119
- def correct_grammar_gector(text):
120
- sentences = [text.split()]
121
- corrected_sentences, _ = gector_model.handle_batch(sentences)
122
- return " ".join(corrected_sentences[0])
123
 
124
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
125
  def paraphrase_with_spacy_nltk(text):
@@ -146,17 +134,27 @@ def paraphrase_with_spacy_nltk(text):
146
  else:
147
  paraphrased_words.append(token.text)
148
 
149
- return ' '.join(paraphrased_words)
 
 
 
150
 
151
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
152
  def paraphrase_and_correct(text):
153
  # Step 1: Paraphrase the text
154
  paraphrased_text = paraphrase_with_spacy_nltk(text)
155
 
156
- # Step 2: Apply grammatical corrections using GECToR
157
- corrected_text = correct_grammar_gector(paraphrased_text)
 
 
 
 
 
 
 
158
 
159
- return corrected_text
160
 
161
  # Gradio app setup with two tabs
162
  with gr.Blocks() as demo:
@@ -166,14 +164,16 @@ with gr.Blocks() as demo:
166
  label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
167
  score1 = gr.Textbox(lines=1, label='Prob')
168
 
 
169
  button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
170
 
171
  with gr.Tab("Humanifier"):
172
  text_input = gr.Textbox(lines=5, label="Input Text")
173
  paraphrase_button = gr.Button("Paraphrase & Correct")
174
- output_text = gr.Textbox(label="Paraphrased and Corrected Text")
175
 
 
176
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
177
 
178
- # Launch the app
179
- demo.launch()
 
5
  import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
 
 
 
 
8
 
9
  # Initialize the English text classification pipeline for AI detection
10
  pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
51
 
52
  return ' '.join(corrected_text)
53
 
54
+ # Function to correct tense errors in a sentence (Tense Correction)
55
  def correct_tense_errors(text):
56
  doc = nlp(text)
57
  corrected_text = []
 
58
  for token in doc:
59
+ # Check for tense correction based on modal verbs
60
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
61
+ # Replace with appropriate verb form
62
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
63
+ corrected_text.append(lemma)
64
  else:
65
  corrected_text.append(token.text)
 
66
  return ' '.join(corrected_text)
67
 
68
+ # Function to correct singular/plural errors (Singular/Plural Correction)
69
  def correct_singular_plural_errors(text):
70
  doc = nlp(text)
71
  corrected_text = []
72
+
73
  for token in doc:
74
  if token.pos_ == "NOUN":
75
+ # Check if the noun is singular or plural
76
  if token.tag_ == "NN": # Singular noun
77
+ # Look for determiners like "many" to correct to plural
78
+ if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
79
+ corrected_text.append(token.lemma_ + 's')
80
+ else:
81
+ corrected_text.append(token.text)
82
  elif token.tag_ == "NNS": # Plural noun
83
+ # Look for determiners like "a", "one" to correct to singular
84
+ if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
85
+ corrected_text.append(token.lemma_)
86
+ else:
87
+ corrected_text.append(token.text)
88
+ else:
89
+ corrected_text.append(token.text)
90
  else:
91
  corrected_text.append(token.text)
92
+
93
  return ' '.join(corrected_text)
94
 
95
+ # Function to check and correct article errors
96
+ def correct_article_errors(text):
97
+ doc = nlp(text)
98
+ corrected_text = []
99
+ for token in doc:
100
+ if token.text in ['a', 'an']:
101
+ next_token = token.nbor(1)
102
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
103
+ corrected_text.append("an")
104
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
105
+ corrected_text.append("a")
106
+ else:
107
+ corrected_text.append(token.text)
108
+ else:
109
+ corrected_text.append(token.text)
110
+ return ' '.join(corrected_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  # Paraphrasing function using SpaCy and NLTK (Humanifier)
113
  def paraphrase_with_spacy_nltk(text):
 
134
  else:
135
  paraphrased_words.append(token.text)
136
 
137
+ # Join the words back into a sentence
138
+ paraphrased_sentence = ' '.join(paraphrased_words)
139
+
140
+ return paraphrased_sentence
141
 
142
  # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
143
  def paraphrase_and_correct(text):
144
  # Step 1: Paraphrase the text
145
  paraphrased_text = paraphrase_with_spacy_nltk(text)
146
 
147
+ # Step 2: Apply grammatical corrections on the paraphrased text
148
+ corrected_text = correct_article_errors(paraphrased_text)
149
+
150
+ corrected_text = capitalize_sentences_and_nouns(corrected_text)
151
+
152
+ corrected_text = correct_singular_plural_errors(corrected_text)
153
+
154
+ # Step 3: Capitalize sentences and proper nouns (final correction step)
155
+ final_text = correct_tense_errors(corrected_text)
156
 
157
+ return final_text
158
 
159
  # Gradio app setup with two tabs
160
  with gr.Blocks() as demo:
 
164
  label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
165
  score1 = gr.Textbox(lines=1, label='Prob')
166
 
167
+ # Connect the prediction function to the button
168
  button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
169
 
170
  with gr.Tab("Humanifier"):
171
  text_input = gr.Textbox(lines=5, label="Input Text")
172
  paraphrase_button = gr.Button("Paraphrase & Correct")
173
+ output_text = gr.Textbox(label="Paraphrased Text")
174
 
175
+ # Connect the paraphrasing function to the button
176
  paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
177
 
178
+ # Launch the app with the remaining functionalities
179
+ demo.launch()