raj-tomar001 commited on
Commit
7df1b03
·
verified ·
1 Parent(s): 54917e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +310 -83
app.py CHANGED
@@ -14,98 +14,325 @@ tokenizer_essay = DebertaTokenizer.from_pretrained(save_path_essay)
14
 
15
  classifier_essay = pipeline('text-classification', model=model_essay, tokenizer=tokenizer_essay)
16
 
17
- def update(name, uploaded_file, radio_input):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  if uploaded_file is not None:
19
- return f"{name}, you uploaded a file named {uploaded_file.name}."
20
  else:
21
- if radio_input == 'Scientific Abstract':
22
- data = classifier_abstract(name)[0]['label']
23
- if data == 'LABEL_0':
24
- return "human_text"
25
- if data == 'LABEL_1':
26
- return "machine_text"
27
- if data == 'LABEL_2':
28
- return "human-written | machine-polished"
29
- if data == 'LABEL_3':
30
- return "machine-generated | machine-humanized"
31
- else:
32
- if radio_input == 'Student Essay':
33
- data = classifier_essay(name)[0]['label']
34
- if data == 'LABEL_0':
35
- return "human_text"
36
- if data == 'LABEL_1':
37
- return "machine_text"
38
- if data == 'LABEL_2':
39
- return "human-written | machine-polished"
40
- if data == 'LABEL_3':
41
- return "machine-generated | machine-humanized"
42
- # return "Hold on!"
43
 
44
- with gr.Blocks() as demo:
45
- gr.Markdown(
46
- """
47
- <style>
48
- .gr-button-secondary {
49
- width: 100px;
50
- height: 30px;
51
- padding: 5px;
52
- }
53
- .gr-row {
54
- display: flex;
55
- align-items: center;
56
- gap: 10px;
57
- }
58
- .gr-block {
59
- padding: 20px;
60
- }
61
- .gr-markdown p {
62
- font-size: 16px;
63
- }
64
- </style>
65
- <span style='font-family: Arial, sans-serif; font-size: 20px;'>Was this text written by <strong>human</strong> or <strong>AI</strong>?</span>
66
- <p style='font-family: Arial, sans-serif;'>Try detecting one of our sample texts:</p>
67
- """
68
- )
69
-
70
- with gr.Row():
71
- for sample in ["Machine-Generated", "Human-Written", "Machine-Humanized", "Machine - Polished"]:
72
- gr.Button(sample, variant="outline")
73
 
74
- with gr.Row():
75
- radio_button = gr.Radio(['Scientific Abstract', 'Student Essay'], label = 'Text Type', info = 'We have specialized models that work on domain-specific text.')
76
 
77
- with gr.Row():
78
- input_text = gr.Textbox(placeholder="Paste your text here...", label="", lines=10)
79
- file_input = gr.File(label="Upload File")
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- #file_input = gr.File(label="", visible=False) # Hide the actual file input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- with gr.Row():
84
- check_button = gr.Button("Check Origin", variant="primary")
85
- clear_button = gr.ClearButton([input_text, file_input, radio_button], variant='stop')
86
- #upload_button = gr.Button("Upload File", variant="secondary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- out = gr.Textbox(label="OUTPUT", placeholder="", lines=2)
89
- clear_button.add(out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- check_button.click(fn=update, inputs=[input_text, file_input, radio_button], outputs=out)
92
- #upload_button.click(lambda: None, inputs=[], outputs=[]).then(fn=update, inputs=[input_text, file_input], outputs=out)
93
-
94
- # Adding JavaScript to simulate file input click
95
- gr.Markdown(
96
- """
97
- <script>
98
- document.addEventListener("DOMContentLoaded", function() {
99
- const uploadButton = Array.from(document.getElementsByTagName('button')).find(el => el.innerText === "Upload File");
100
- if (uploadButton) {
101
- uploadButton.onclick = function() {
102
- document.querySelector('input[type="file"]').click();
103
- };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  }
105
- });
106
- </script>
107
- """
108
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- demo.launch(share=True)
111
 
 
14
 
15
  classifier_essay = pipeline('text-classification', model=model_essay, tokenizer=tokenizer_essay)
16
 
17
+ demo_essays = json.load(open('/samples.json'))
18
+ index = None
19
+
20
+
21
+ ################# HELPER FUNCTIONS (DETECTION TAB) ####################
22
+
23
+ def process_result_detection_tab(text):
24
+ '''
25
+ Classify the text into one of the four categories by averaging the soft predictions of the two models.
26
+
27
+ Args:
28
+ text: str: the text to be classified
29
+ Returns:
30
+ dict: a dictionary with the following keys:
31
+ 'Machine Generated': float: the probability that the text is machine generated
32
+ 'Human Written': float: the probability that the text is human written
33
+ 'Machine Written, Machine Humanized': float: the probability that the text is machine written and machine humanized
34
+ 'Human Written, Machine Polished': float: the probability that the text is human written and machine polished
35
+ '''
36
+ mapping = {'llm': 'Machine Generated', 'human':'Human Written', 'machine-humanized': 'Machine Written, Machine Humanized', 'machine-polished': 'Human Written, Machine Polished'}
37
+
38
+ result = classifier(text)[0]
39
+ result_r = classifier_roberta(text)[0]
40
+
41
+ labels = [mapping[x['label']] for x in result]
42
+ scores = list(0.5 * np.array([x['score'] for x in result]) + 0.5 * np.array([x['score'] for x in result_r]))
43
+
44
+ final_results = dict(zip(labels, scores))
45
+ print(final_results)
46
+ return final_results
47
+
48
+ def update_detection_tab(name, uploaded_file, radio_input):
49
+ '''
50
+ Callback function to update the result of the classification based on the input text or uploaded file.
51
+ Args:
52
+ name: str: the input text from the Textbox
53
+ uploaded_file: file: the uploaded file from the file input
54
+ Returns:
55
+ dict: the result of the classification including labels and scores
56
+ '''
57
+
58
+ if name == '' and uploaded_file is None:
59
+ return ""
60
  if uploaded_file is not None:
61
+ return f"Work in progress"
62
  else:
63
+ return process_result_detection_tab(name)
64
+
65
+ def active_button_detection_tab(input_text, file_input):
66
+ '''
67
+ Callback function to activate the 'Check Origin' button when the input text or file input
68
+ is not empty. For text input, the button can be clickde only when the word count is between
69
+ 50 and 500.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ Args:
72
+ input_text: str: the input text from the textbox
73
+ file_input: file: the uploaded file from the file input
74
+ Returns:
75
+ gr.Button: The 'Check Origin' button with the appropriate interactivity.
76
+ '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ if (input_text == "" and file_input is None) or (file_input is None and not (50 <= len(input_text.split()) <= 500)):
79
+ return gr.Button("Check Origin", variant="primary", interactive=False)
80
 
81
+ return gr.Button("Check Origin", variant="primary", interactive=True)
82
+
83
+ def clear_detection_tab():
84
+ '''
85
+ Callback function to clear the input text and file input in the 'Try it!' tab.
86
+ The interactivity of the 'Check Origin' button is set to False to prevent user click when the Textbox is empty.
87
+
88
+ Args:
89
+ None
90
+ Returns:
91
+ str: An empty string to clear the Textbox.
92
+ None: None to clear the file input.
93
+ gr.Button: The 'Check Origin' button with no interactivity.
94
+ '''
95
 
96
+ return "", None, gr.Button("Check Origin", variant="primary", interactive=False)
97
+
98
+ def count_words_detection_tab(text):
99
+ '''
100
+ Callback function called when the input text is changed to update the word count.
101
+ Args:
102
+ text: str: the input text from the Textbox
103
+ Returns:
104
+ str: the word count of the input text for the Markdown widget
105
+ '''
106
+ return (f'{len(text.split())}/500 words (Minimum 50 words)')
107
+
108
+
109
+ ################# HELPER FUNCTIONS (CHALLENGE TAB) ####################
110
+
111
+ def clear_challenge_tab():
112
+ '''
113
+ Callback function to clear the text and result in the 'Challenge Yourself' tab.
114
+ The interactivity of the buttons is set to False to prevent user click when the Textbox is empty.
115
+
116
+ Args:
117
+ None
118
+ Returns:
119
+ gr.Button: The 'Machine-Generated' button with no interactivity.
120
+ gr.Button: The 'Human-Written' button with no interactivity.
121
+ gr.Button: The 'Machine-Humanized' button with no interactivity.
122
+ gr.Button: The 'Machine-Polished' button with no interactivity.
123
+ str: An empty string to clear the Textbox.
124
+ '''
125
+
126
+ mg = gr.Button("Machine-Generated", variant="secondary", interactive=False)
127
+ hw = gr.Button("Human-Written", variant="secondary", interactive=False)
128
+ mh = gr.Button("Machine-Humanized", variant="secondary", interactive=False)
129
+ mp = gr.Button("Machine-Polished", variant="secondary", interactive=False)
130
 
131
+ return mg, hw, mh, mp, ''
132
+
133
+ def generate_text_challenge_tab():
134
+ '''
135
+ Callback function to randomly sample an essay from the dataset and set the interactivity of the buttons to True.
136
+ Args:
137
+ None
138
+ Returns:
139
+ str: A sample text from the dataset
140
+ gr.Button: The 'Machine-Generated' button with interactivity.
141
+ gr.Button: The 'Human-Written' button with interactivity.
142
+ gr.Button: The 'Machine-Humanized' button with interactivity.
143
+ gr.Button: The 'Machine-Polished' button with interactivity.
144
+ str: An empty string to clear the Result.
145
+ '''
146
+
147
+ global index # to access the index of the sample text for the show_result function
148
+ mg = gr.Button("Machine-Generated", variant="secondary", interactive=True)
149
+ hw = gr.Button("Human-Written", variant="secondary", interactive=True)
150
+ mh = gr.Button("Machine-Humanized", variant="secondary", interactive=True)
151
+ mp = gr.Button("Machine-Polished", variant="secondary", interactive=True)
152
+ index = random.choice(range(80))
153
+ essay = demo_essays[index][0]
154
+ return essay, mg, hw, mh, mp, ''
155
+
156
+ def correct_label_challenge_tab():
157
+ '''
158
+ Function to return the correct label of the sample text based on the index (global variable).
159
+ Args:
160
+ None
161
+ Returns:
162
+ str: The correct label of the sample text
163
+ '''
164
+
165
+ if 0 <= index < 20 :
166
+ return 'Human-Written'
167
+ elif 20 <= index < 40:
168
+ return 'Machine-Generated'
169
+ elif 40 <= index < 60:
170
+ return 'Machine-Polished'
171
+ elif 60 <= index < 80:
172
+ return 'Machine-Humanized'
173
+
174
+ def show_result_challenge_tab(button):
175
+ '''
176
+ Callback function to show the result of the classification based on the button clicked by the user.
177
+ The correct label of the sample text is displayed in the primary variant.
178
+ The chosen label by the user is displayed in the stop variant if it is incorrect.
179
+
180
+ Args:
181
+ button: str: the label of the button clicked by the user
182
+ Returns:
183
+ str: the outcome of the classification
184
+ gr.Button: The 'Machine-Generated' button with the appropriate variant.
185
+ gr.Button: The 'Human-Written' button with the appropriate variant.
186
+ gr.Button: The 'Machine-Humanized' button with the appropriate variant.
187
+ gr.Button: The 'Machine-Polished' button with the appropriate variant.
188
+ '''
189
+
190
+ correct_btn = correct_label_challenge_tab()
191
+ mg = gr.Button("Machine-Generated", variant="secondary")
192
+ hw = gr.Button("Human-Written", variant="secondary")
193
+ mh = gr.Button("Machine-Humanized", variant="secondary")
194
+ mp = gr.Button("Machine-Polished", variant="secondary")
195
 
196
+ if button == 'Machine-Generated':
197
+ mg = gr.Button("Machine-Generated", variant="stop")
198
+ elif button == 'Human-Written':
199
+ hw = gr.Button("Human-Written", variant="stop")
200
+ elif button == 'Machine-Humanized':
201
+ mh = gr.Button("Machine-Humanized", variant="stop")
202
+ elif button == 'Machine-Polished':
203
+ mp = gr.Button("Machine-Polished", variant="stop")
204
+
205
+ if correct_btn == 'Machine-Generated':
206
+ mg = gr.Button("Machine-Generated", variant="primary")
207
+ elif correct_btn == 'Human-Written':
208
+ hw = gr.Button("Human-Written", variant="primary")
209
+ elif correct_btn == 'Machine-Humanized':
210
+ mh = gr.Button("Machine-Humanized", variant="primary")
211
+ elif correct_btn == 'Machine-Polished':
212
+ mp = gr.Button("Machine-Polished", variant="primary")
213
+
214
+ outcome = ''
215
+ if button == correct_btn:
216
+ outcome = 'Correct'
217
+ else:
218
+ outcome = 'Incorrect'
219
+
220
+ return outcome, mg, hw, mh, mp
221
+
222
+
223
+ ############################## GRADIO UI ##############################
224
+
225
+ with gr.Blocks() as demo:
226
+
227
+ gr.Markdown("""<h1><centre>Machine Generated Text (MGT) Detection</center></h1>""")
228
+ with gr.Tab('Try it!'):
229
+
230
+ with gr.Row():
231
+ radio_button = gr.Dropdown(['Student Essay', 'Scientific Abstract'], label = 'Text Type', info = 'We have specialized models that work on domain-specific text.', value='Student Essay')
232
+
233
+ with gr.Row():
234
+
235
+ input_text = gr.Textbox(placeholder="Paste your text here...", label="Text", lines=10, max_lines=15)
236
+ file_input = gr.File(label="Upload File", file_types=[".txt", ".pdf"])
237
+
238
+ with gr.Row():
239
+ wc = gr.Markdown("0/500 words (Minimum 50 words)")
240
+ with gr.Row():
241
+ check_button = gr.Button("Check Origin", variant="primary", interactive=False)
242
+ clear_button = gr.ClearButton([input_text, file_input], variant="stop")
243
+
244
+ out = gr.Label(label='Result')
245
+ clear_button.add(out)
246
+
247
+ check_button.click(fn=update_detection_tab, inputs=[input_text, file_input, radio_button], outputs=out)
248
+
249
+ input_text.change(count_words_detection_tab, input_text, wc, show_progress=False)
250
+ input_text.input(
251
+ active_button_detection_tab,
252
+ [input_text, file_input],
253
+ [check_button],
254
+ )
255
+
256
+ file_input.upload(
257
+ active_button_detection_tab,
258
+ [input_text, file_input],
259
+ [check_button],
260
+ )
261
+
262
+ clear_button.click(
263
+ clear_detection_tab,
264
+ inputs=[],
265
+ outputs=[input_text, file_input, check_button],
266
+ )
267
 
268
+
269
+ # Adding JavaScript to simulate file input click
270
+ gr.Markdown(
271
+ """
272
+ <script>
273
+ document.addEventListener("DOMContentLoaded", function() {
274
+ const uploadButton = Array.from(document.getElementsByTagName('button')).find(el => el.innerText === "Upload File");
275
+ if (uploadButton) {
276
+ uploadButton.onclick = function() {
277
+ document.querySelector('input[type="file"]').click();
278
+ };
279
+ }
280
+ });
281
+ </script>
282
+ """
283
+ )
284
+
285
+ with gr.Tab('Challenge Yourself!'):
286
+ gr.Markdown(
287
+ """
288
+ <style>
289
+ .gr-button-secondary {
290
+ width: 100px;
291
+ height: 30px;
292
+ padding: 5px;
293
+ }
294
+ .gr-row {
295
+ display: flex;
296
+ align-items: center;
297
+ gap: 10px;
298
+ }
299
+ .gr-block {
300
+ padding: 20px;
301
+ }
302
+ .gr-markdown p {
303
+ font-size: 16px;
304
  }
305
+ </style>
306
+ <span style='font-family: Arial, sans-serif; font-size: 20px;'>Was this text written by <strong>human</strong> or <strong>AI</strong>?</span>
307
+ <p style='font-family: Arial, sans-serif;'>Try detecting one of our sample texts:</p>
308
+ """
309
+ )
310
+
311
+
312
+ with gr.Row():
313
+ generate = gr.Button("Generate Sample Text", variant="primary")
314
+ clear = gr.ClearButton([], variant="stop")
315
+
316
+ with gr.Row():
317
+ text = gr.Textbox(value="", label="Text", lines=20, interactive=False)
318
+
319
+ with gr.Row():
320
+
321
+ mg = gr.Button("Machine-Generated", variant="secondary", interactive=False)
322
+ hw = gr.Button("Human-Written", variant="secondary", interactive=False)
323
+ mh = gr.Button("Machine-Humanized", variant="secondary", interactive=False)
324
+ mp = gr.Button("Machine-Polished", variant="secondary", interactive=False)
325
+
326
+ with gr.Row():
327
+ result = gr.Label(label="Result", value="")
328
+
329
+ clear.add([result, text])
330
+ generate.click(generate_text_challenge_tab, [], [text, mg, hw, mh, mp, result])
331
+ for button in [mg, hw, mh, mp]:
332
+ button.click(show_result_challenge_tab, [button], [result, mg, hw, mh, mp])
333
+
334
+ clear.click(clear_challenge_tab, [], [mg, hw, mh, mp, result])
335
+
336
 
337
+ demo.launch(share=False)
338