sashtech commited on
Commit
90fff6b
·
verified ·
1 Parent(s): f79e1dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -62
app.py CHANGED
@@ -1,57 +1,83 @@
1
  import os
2
- import subprocess
3
  import gradio as gr
4
  from transformers import pipeline
5
  import spacy
 
6
  import nltk
7
  from nltk.corpus import wordnet
8
 
9
- # Ensure necessary NLTK data is downloaded
 
 
 
 
 
 
 
 
10
  nltk.download('wordnet')
11
  nltk.download('omw-1.4')
12
 
13
- # Ensure the SpaCy model is installed
14
  try:
15
  nlp = spacy.load("en_core_web_sm")
16
  except OSError:
17
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
18
  nlp = spacy.load("en_core_web_sm")
19
 
20
- # Initialize the English text classification pipeline for AI detection
21
- pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
22
-
23
- def predict_en(text):
24
- """Function to predict the label and score for English text (AI Detection)"""
25
- res = pipeline_en(text)[0]
26
- return res['label'], res['score']
27
-
28
  def get_synonyms_nltk(word, pos):
29
- """Function to get synonyms using NLTK WordNet"""
30
  synsets = wordnet.synsets(word, pos=pos)
31
  if synsets:
32
  lemmas = synsets[0].lemmas()
33
  return [lemma.name() for lemma in lemmas]
34
  return []
35
 
36
- def rephrase_text(text):
37
- """Function to rephrase text by replacing words with synonyms"""
38
  doc = nlp(text)
39
- rephrased_text = []
 
40
 
41
  for token in doc:
42
- if token.pos_ in ["NOUN", "VERB", "ADJ"]:
43
- synonyms = get_synonyms_nltk(token.text, pos=token.pos_.lower())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  if synonyms:
45
- rephrased_text.append(synonyms[0]) # Replace with first synonym found
 
 
 
 
 
 
46
  else:
47
- rephrased_text.append(token.text)
48
- else:
49
- rephrased_text.append(token.text)
50
-
51
- return ' '.join(rephrased_text)
52
 
 
 
 
53
  def capitalize_sentences_and_nouns(text):
54
- """Function to capitalize the first letter of sentences and proper nouns"""
55
  doc = nlp(text)
56
  corrected_text = []
57
 
@@ -68,22 +94,42 @@ def capitalize_sentences_and_nouns(text):
68
 
69
  return ' '.join(corrected_text)
70
 
71
- def correct_tense_errors(text):
72
- """Function to correct tense errors in a sentence"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  doc = nlp(text)
74
  corrected_text = []
75
  for token in doc:
76
- if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
77
- lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
78
- corrected_text.append(lemma)
 
 
 
 
 
79
  else:
80
  corrected_text.append(token.text)
81
  return ' '.join(corrected_text)
82
 
 
83
  def correct_singular_plural_errors(text):
84
- """Function to correct singular/plural errors"""
85
  doc = nlp(text)
86
  corrected_text = []
 
87
  for token in doc:
88
  if token.pos_ == "NOUN":
89
  if token.tag_ == "NN": # Singular noun
@@ -98,48 +144,39 @@ def correct_singular_plural_errors(text):
98
  corrected_text.append(token.text)
99
  else:
100
  corrected_text.append(token.text)
 
101
  return ' '.join(corrected_text)
102
 
103
- def correct_article_errors(text):
104
- """Function to check and correct article errors"""
105
  doc = nlp(text)
106
  corrected_text = []
107
  for token in doc:
108
- if token.text in ['a', 'an']:
109
- next_token = token.nbor(1)
110
- if token.text == "a" and next_token.text[0].lower() in "aeiou":
111
- corrected_text.append("an")
112
- elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
113
- corrected_text.append("a")
114
- else:
115
- corrected_text.append(token.text)
116
  else:
117
  corrected_text.append(token.text)
118
  return ' '.join(corrected_text)
119
 
120
- def paraphrase_and_correct(text):
121
- """Function to rephrase and correct grammar"""
122
- rephrased_text = rephrase_text(text)
123
- rephrased_text = capitalize_sentences_and_nouns(rephrased_text) # Capitalize first to ensure proper noun capitalization
124
- rephrased_text = correct_article_errors(rephrased_text)
125
- rephrased_text = correct_tense_errors(rephrased_text)
126
- rephrased_text = correct_singular_plural_errors(rephrased_text)
127
- return rephrased_text
128
-
129
- # Define Gradio interface
130
  with gr.Blocks() as demo:
131
- with gr.Row():
132
- t1 = gr.Textbox(label="Input Text", lines=5)
133
- button1 = gr.Button("Process")
134
- with gr.Row():
135
- output_text = gr.Textbox(label="Processed Text", lines=5)
136
- label1 = gr.Label(label="AI Detection Label")
137
- score1 = gr.Label(label="AI Detection Score")
 
138
 
139
- button1.click(
140
- fn=lambda text: (paraphrase_and_correct(text), *predict_en(text)),
141
- inputs=[t1],
142
- outputs=[output_text, label1, score1]
143
- )
 
 
144
 
 
145
  demo.launch()
 
1
  import os
 
2
  import gradio as gr
3
  from transformers import pipeline
4
  import spacy
5
+ import subprocess
6
  import nltk
7
  from nltk.corpus import wordnet
8
 
9
+ # Initialize the English text classification pipeline for AI detection
10
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
11
+
12
+ # Function to predict the label and score for English text (AI Detection)
13
+ def predict_en(text):
14
+ res = pipeline_en(text)[0]
15
+ return res['label'], res['score']
16
+
17
+ # Ensure necessary NLTK data is downloaded for Humanifier
18
  nltk.download('wordnet')
19
  nltk.download('omw-1.4')
20
 
21
+ # Ensure the SpaCy model is installed for Humanifier
22
  try:
23
  nlp = spacy.load("en_core_web_sm")
24
  except OSError:
25
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
26
  nlp = spacy.load("en_core_web_sm")
27
 
28
+ # Function to get synonyms using NLTK WordNet (Humanifier)
 
 
 
 
 
 
 
29
  def get_synonyms_nltk(word, pos):
 
30
  synsets = wordnet.synsets(word, pos=pos)
31
  if synsets:
32
  lemmas = synsets[0].lemmas()
33
  return [lemma.name() for lemma in lemmas]
34
  return []
35
 
36
+ # Updated function to replace words with synonyms while preserving verb forms and pluralization
37
+ def replace_with_synonyms(text):
38
  doc = nlp(text)
39
+ replaced_words = {}
40
+ corrected_text = []
41
 
42
  for token in doc:
43
+ word = token.text
44
+ pos = token.pos_
45
+
46
+ # Get the WordNet POS tag format
47
+ if pos == "VERB":
48
+ wordnet_pos = wordnet.VERB
49
+ elif pos == "NOUN":
50
+ wordnet_pos = wordnet.NOUN
51
+ elif pos == "ADJ":
52
+ wordnet_pos = wordnet.ADJ
53
+ elif pos == "ADV":
54
+ wordnet_pos = wordnet.ADV
55
+ else:
56
+ corrected_text.append(word) # No change for other POS
57
+ continue
58
+
59
+ # Get synonyms for the word based on POS
60
+ if word in replaced_words:
61
+ synonym = replaced_words[word]
62
+ else:
63
+ synonyms = get_synonyms_nltk(word, wordnet_pos)
64
  if synonyms:
65
+ synonym = synonyms[0] # Use the first synonym
66
+ # Ensure the synonym retains the same form (e.g., plural, verb form)
67
+ if pos == "VERB":
68
+ synonym = token.lemma_ if synonym == token.lemma_ else token._.inflect(token.tag_)
69
+ if pos == "NOUN" and token.tag_ == "NNS": # If plural noun, make sure synonym is plural
70
+ synonym += 's'
71
+ replaced_words[word] = synonym
72
  else:
73
+ synonym = word # No synonym found, keep the word as is
74
+
75
+ corrected_text.append(synonym)
 
 
76
 
77
+ return ' '.join(corrected_text)
78
+
79
+ # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
80
  def capitalize_sentences_and_nouns(text):
 
81
  doc = nlp(text)
82
  corrected_text = []
83
 
 
94
 
95
  return ' '.join(corrected_text)
96
 
97
+ # Function to paraphrase and correct grammar with stronger synonym usage
98
+ def paraphrase_and_correct(text):
99
+ paraphrased_text = capitalize_sentences_and_nouns(text) # Capitalize first to ensure proper noun capitalization
100
+
101
+ # Replace words with their synonyms
102
+ paraphrased_text = replace_with_synonyms(paraphrased_text)
103
+
104
+ # Apply grammatical corrections (can include other corrections from the original functions)
105
+ paraphrased_text = correct_article_errors(paraphrased_text)
106
+ paraphrased_text = correct_singular_plural_errors(paraphrased_text)
107
+ paraphrased_text = correct_tense_errors(paraphrased_text)
108
+
109
+ return paraphrased_text
110
+
111
+ # Correct article errors
112
+ def correct_article_errors(text):
113
  doc = nlp(text)
114
  corrected_text = []
115
  for token in doc:
116
+ if token.text in ['a', 'an']:
117
+ next_token = token.nbor(1)
118
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
119
+ corrected_text.append("an")
120
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
121
+ corrected_text.append("a")
122
+ else:
123
+ corrected_text.append(token.text)
124
  else:
125
  corrected_text.append(token.text)
126
  return ' '.join(corrected_text)
127
 
128
+ # Correct singular/plural errors
129
  def correct_singular_plural_errors(text):
 
130
  doc = nlp(text)
131
  corrected_text = []
132
+
133
  for token in doc:
134
  if token.pos_ == "NOUN":
135
  if token.tag_ == "NN": # Singular noun
 
144
  corrected_text.append(token.text)
145
  else:
146
  corrected_text.append(token.text)
147
+
148
  return ' '.join(corrected_text)
149
 
150
+ # Correct tense errors in verbs
151
+ def correct_tense_errors(text):
152
  doc = nlp(text)
153
  corrected_text = []
154
  for token in doc:
155
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
156
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
157
+ corrected_text.append(lemma)
 
 
 
 
 
158
  else:
159
  corrected_text.append(token.text)
160
  return ' '.join(corrected_text)
161
 
162
+ # Gradio app setup with two tabs
 
 
 
 
 
 
 
 
 
163
  with gr.Blocks() as demo:
164
+ with gr.Tab("AI Detection"):
165
+ t1 = gr.Textbox(lines=5, label='Text')
166
+ button1 = gr.Button("🤖 Predict!")
167
+ label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
168
+ score1 = gr.Textbox(lines=1, label='Prob')
169
+
170
+ # Connect the prediction function to the button
171
+ button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
172
 
173
+ with gr.Tab("Humanifier"):
174
+ text_input = gr.Textbox(lines=5, label="Input Text")
175
+ paraphrase_button = gr.Button("Paraphrase & Correct")
176
+ output_text = gr.Textbox(label="Paraphrased Text")
177
+
178
+ # Connect the paraphrasing function to the button
179
+ paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
180
 
181
+ # Launch the app with the remaining functionalities
182
  demo.launch()