Spaces:
Running
Running
raj-tomar001
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -14,98 +14,325 @@ tokenizer_essay = DebertaTokenizer.from_pretrained(save_path_essay)
|
|
14 |
|
15 |
classifier_essay = pipeline('text-classification', model=model_essay, tokenizer=tokenizer_essay)
|
16 |
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
if uploaded_file is not None:
|
19 |
-
return f"
|
20 |
else:
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
return "human-written | machine-polished"
|
29 |
-
if data == 'LABEL_3':
|
30 |
-
return "machine-generated | machine-humanized"
|
31 |
-
else:
|
32 |
-
if radio_input == 'Student Essay':
|
33 |
-
data = classifier_essay(name)[0]['label']
|
34 |
-
if data == 'LABEL_0':
|
35 |
-
return "human_text"
|
36 |
-
if data == 'LABEL_1':
|
37 |
-
return "machine_text"
|
38 |
-
if data == 'LABEL_2':
|
39 |
-
return "human-written | machine-polished"
|
40 |
-
if data == 'LABEL_3':
|
41 |
-
return "machine-generated | machine-humanized"
|
42 |
-
# return "Hold on!"
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
.
|
49 |
-
|
50 |
-
height: 30px;
|
51 |
-
padding: 5px;
|
52 |
-
}
|
53 |
-
.gr-row {
|
54 |
-
display: flex;
|
55 |
-
align-items: center;
|
56 |
-
gap: 10px;
|
57 |
-
}
|
58 |
-
.gr-block {
|
59 |
-
padding: 20px;
|
60 |
-
}
|
61 |
-
.gr-markdown p {
|
62 |
-
font-size: 16px;
|
63 |
-
}
|
64 |
-
</style>
|
65 |
-
<span style='font-family: Arial, sans-serif; font-size: 20px;'>Was this text written by <strong>human</strong> or <strong>AI</strong>?</span>
|
66 |
-
<p style='font-family: Arial, sans-serif;'>Try detecting one of our sample texts:</p>
|
67 |
-
"""
|
68 |
-
)
|
69 |
-
|
70 |
-
with gr.Row():
|
71 |
-
for sample in ["Machine-Generated", "Human-Written", "Machine-Humanized", "Machine - Polished"]:
|
72 |
-
gr.Button(sample, variant="outline")
|
73 |
|
74 |
-
|
75 |
-
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
}
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
-
demo.launch(share=
|
111 |
|
|
|
14 |
|
15 |
classifier_essay = pipeline('text-classification', model=model_essay, tokenizer=tokenizer_essay)
|
16 |
|
17 |
+
demo_essays = json.load(open('/samples.json'))
|
18 |
+
index = None
|
19 |
+
|
20 |
+
|
21 |
+
################# HELPER FUNCTIONS (DETECTION TAB) ####################
|
22 |
+
|
23 |
+
def process_result_detection_tab(text):
|
24 |
+
'''
|
25 |
+
Classify the text into one of the four categories by averaging the soft predictions of the two models.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
text: str: the text to be classified
|
29 |
+
Returns:
|
30 |
+
dict: a dictionary with the following keys:
|
31 |
+
'Machine Generated': float: the probability that the text is machine generated
|
32 |
+
'Human Written': float: the probability that the text is human written
|
33 |
+
'Machine Written, Machine Humanized': float: the probability that the text is machine written and machine humanized
|
34 |
+
'Human Written, Machine Polished': float: the probability that the text is human written and machine polished
|
35 |
+
'''
|
36 |
+
mapping = {'llm': 'Machine Generated', 'human':'Human Written', 'machine-humanized': 'Machine Written, Machine Humanized', 'machine-polished': 'Human Written, Machine Polished'}
|
37 |
+
|
38 |
+
result = classifier(text)[0]
|
39 |
+
result_r = classifier_roberta(text)[0]
|
40 |
+
|
41 |
+
labels = [mapping[x['label']] for x in result]
|
42 |
+
scores = list(0.5 * np.array([x['score'] for x in result]) + 0.5 * np.array([x['score'] for x in result_r]))
|
43 |
+
|
44 |
+
final_results = dict(zip(labels, scores))
|
45 |
+
print(final_results)
|
46 |
+
return final_results
|
47 |
+
|
48 |
+
def update_detection_tab(name, uploaded_file, radio_input):
|
49 |
+
'''
|
50 |
+
Callback function to update the result of the classification based on the input text or uploaded file.
|
51 |
+
Args:
|
52 |
+
name: str: the input text from the Textbox
|
53 |
+
uploaded_file: file: the uploaded file from the file input
|
54 |
+
Returns:
|
55 |
+
dict: the result of the classification including labels and scores
|
56 |
+
'''
|
57 |
+
|
58 |
+
if name == '' and uploaded_file is None:
|
59 |
+
return ""
|
60 |
if uploaded_file is not None:
|
61 |
+
return f"Work in progress"
|
62 |
else:
|
63 |
+
return process_result_detection_tab(name)
|
64 |
+
|
65 |
+
def active_button_detection_tab(input_text, file_input):
|
66 |
+
'''
|
67 |
+
Callback function to activate the 'Check Origin' button when the input text or file input
|
68 |
+
is not empty. For text input, the button can be clickde only when the word count is between
|
69 |
+
50 and 500.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
+
Args:
|
72 |
+
input_text: str: the input text from the textbox
|
73 |
+
file_input: file: the uploaded file from the file input
|
74 |
+
Returns:
|
75 |
+
gr.Button: The 'Check Origin' button with the appropriate interactivity.
|
76 |
+
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
+
if (input_text == "" and file_input is None) or (file_input is None and not (50 <= len(input_text.split()) <= 500)):
|
79 |
+
return gr.Button("Check Origin", variant="primary", interactive=False)
|
80 |
|
81 |
+
return gr.Button("Check Origin", variant="primary", interactive=True)
|
82 |
+
|
83 |
+
def clear_detection_tab():
|
84 |
+
'''
|
85 |
+
Callback function to clear the input text and file input in the 'Try it!' tab.
|
86 |
+
The interactivity of the 'Check Origin' button is set to False to prevent user click when the Textbox is empty.
|
87 |
+
|
88 |
+
Args:
|
89 |
+
None
|
90 |
+
Returns:
|
91 |
+
str: An empty string to clear the Textbox.
|
92 |
+
None: None to clear the file input.
|
93 |
+
gr.Button: The 'Check Origin' button with no interactivity.
|
94 |
+
'''
|
95 |
|
96 |
+
return "", None, gr.Button("Check Origin", variant="primary", interactive=False)
|
97 |
+
|
98 |
+
def count_words_detection_tab(text):
|
99 |
+
'''
|
100 |
+
Callback function called when the input text is changed to update the word count.
|
101 |
+
Args:
|
102 |
+
text: str: the input text from the Textbox
|
103 |
+
Returns:
|
104 |
+
str: the word count of the input text for the Markdown widget
|
105 |
+
'''
|
106 |
+
return (f'{len(text.split())}/500 words (Minimum 50 words)')
|
107 |
+
|
108 |
+
|
109 |
+
################# HELPER FUNCTIONS (CHALLENGE TAB) ####################
|
110 |
+
|
111 |
+
def clear_challenge_tab():
|
112 |
+
'''
|
113 |
+
Callback function to clear the text and result in the 'Challenge Yourself' tab.
|
114 |
+
The interactivity of the buttons is set to False to prevent user click when the Textbox is empty.
|
115 |
+
|
116 |
+
Args:
|
117 |
+
None
|
118 |
+
Returns:
|
119 |
+
gr.Button: The 'Machine-Generated' button with no interactivity.
|
120 |
+
gr.Button: The 'Human-Written' button with no interactivity.
|
121 |
+
gr.Button: The 'Machine-Humanized' button with no interactivity.
|
122 |
+
gr.Button: The 'Machine-Polished' button with no interactivity.
|
123 |
+
str: An empty string to clear the Textbox.
|
124 |
+
'''
|
125 |
+
|
126 |
+
mg = gr.Button("Machine-Generated", variant="secondary", interactive=False)
|
127 |
+
hw = gr.Button("Human-Written", variant="secondary", interactive=False)
|
128 |
+
mh = gr.Button("Machine-Humanized", variant="secondary", interactive=False)
|
129 |
+
mp = gr.Button("Machine-Polished", variant="secondary", interactive=False)
|
130 |
|
131 |
+
return mg, hw, mh, mp, ''
|
132 |
+
|
133 |
+
def generate_text_challenge_tab():
|
134 |
+
'''
|
135 |
+
Callback function to randomly sample an essay from the dataset and set the interactivity of the buttons to True.
|
136 |
+
Args:
|
137 |
+
None
|
138 |
+
Returns:
|
139 |
+
str: A sample text from the dataset
|
140 |
+
gr.Button: The 'Machine-Generated' button with interactivity.
|
141 |
+
gr.Button: The 'Human-Written' button with interactivity.
|
142 |
+
gr.Button: The 'Machine-Humanized' button with interactivity.
|
143 |
+
gr.Button: The 'Machine-Polished' button with interactivity.
|
144 |
+
str: An empty string to clear the Result.
|
145 |
+
'''
|
146 |
+
|
147 |
+
global index # to access the index of the sample text for the show_result function
|
148 |
+
mg = gr.Button("Machine-Generated", variant="secondary", interactive=True)
|
149 |
+
hw = gr.Button("Human-Written", variant="secondary", interactive=True)
|
150 |
+
mh = gr.Button("Machine-Humanized", variant="secondary", interactive=True)
|
151 |
+
mp = gr.Button("Machine-Polished", variant="secondary", interactive=True)
|
152 |
+
index = random.choice(range(80))
|
153 |
+
essay = demo_essays[index][0]
|
154 |
+
return essay, mg, hw, mh, mp, ''
|
155 |
+
|
156 |
+
def correct_label_challenge_tab():
|
157 |
+
'''
|
158 |
+
Function to return the correct label of the sample text based on the index (global variable).
|
159 |
+
Args:
|
160 |
+
None
|
161 |
+
Returns:
|
162 |
+
str: The correct label of the sample text
|
163 |
+
'''
|
164 |
+
|
165 |
+
if 0 <= index < 20 :
|
166 |
+
return 'Human-Written'
|
167 |
+
elif 20 <= index < 40:
|
168 |
+
return 'Machine-Generated'
|
169 |
+
elif 40 <= index < 60:
|
170 |
+
return 'Machine-Polished'
|
171 |
+
elif 60 <= index < 80:
|
172 |
+
return 'Machine-Humanized'
|
173 |
+
|
174 |
+
def show_result_challenge_tab(button):
|
175 |
+
'''
|
176 |
+
Callback function to show the result of the classification based on the button clicked by the user.
|
177 |
+
The correct label of the sample text is displayed in the primary variant.
|
178 |
+
The chosen label by the user is displayed in the stop variant if it is incorrect.
|
179 |
+
|
180 |
+
Args:
|
181 |
+
button: str: the label of the button clicked by the user
|
182 |
+
Returns:
|
183 |
+
str: the outcome of the classification
|
184 |
+
gr.Button: The 'Machine-Generated' button with the appropriate variant.
|
185 |
+
gr.Button: The 'Human-Written' button with the appropriate variant.
|
186 |
+
gr.Button: The 'Machine-Humanized' button with the appropriate variant.
|
187 |
+
gr.Button: The 'Machine-Polished' button with the appropriate variant.
|
188 |
+
'''
|
189 |
+
|
190 |
+
correct_btn = correct_label_challenge_tab()
|
191 |
+
mg = gr.Button("Machine-Generated", variant="secondary")
|
192 |
+
hw = gr.Button("Human-Written", variant="secondary")
|
193 |
+
mh = gr.Button("Machine-Humanized", variant="secondary")
|
194 |
+
mp = gr.Button("Machine-Polished", variant="secondary")
|
195 |
|
196 |
+
if button == 'Machine-Generated':
|
197 |
+
mg = gr.Button("Machine-Generated", variant="stop")
|
198 |
+
elif button == 'Human-Written':
|
199 |
+
hw = gr.Button("Human-Written", variant="stop")
|
200 |
+
elif button == 'Machine-Humanized':
|
201 |
+
mh = gr.Button("Machine-Humanized", variant="stop")
|
202 |
+
elif button == 'Machine-Polished':
|
203 |
+
mp = gr.Button("Machine-Polished", variant="stop")
|
204 |
+
|
205 |
+
if correct_btn == 'Machine-Generated':
|
206 |
+
mg = gr.Button("Machine-Generated", variant="primary")
|
207 |
+
elif correct_btn == 'Human-Written':
|
208 |
+
hw = gr.Button("Human-Written", variant="primary")
|
209 |
+
elif correct_btn == 'Machine-Humanized':
|
210 |
+
mh = gr.Button("Machine-Humanized", variant="primary")
|
211 |
+
elif correct_btn == 'Machine-Polished':
|
212 |
+
mp = gr.Button("Machine-Polished", variant="primary")
|
213 |
+
|
214 |
+
outcome = ''
|
215 |
+
if button == correct_btn:
|
216 |
+
outcome = 'Correct'
|
217 |
+
else:
|
218 |
+
outcome = 'Incorrect'
|
219 |
+
|
220 |
+
return outcome, mg, hw, mh, mp
|
221 |
+
|
222 |
+
|
223 |
+
############################## GRADIO UI ##############################
|
224 |
+
|
225 |
+
with gr.Blocks() as demo:
|
226 |
+
|
227 |
+
gr.Markdown("""<h1><centre>Machine Generated Text (MGT) Detection</center></h1>""")
|
228 |
+
with gr.Tab('Try it!'):
|
229 |
+
|
230 |
+
with gr.Row():
|
231 |
+
radio_button = gr.Dropdown(['Student Essay', 'Scientific Abstract'], label = 'Text Type', info = 'We have specialized models that work on domain-specific text.', value='Student Essay')
|
232 |
+
|
233 |
+
with gr.Row():
|
234 |
+
|
235 |
+
input_text = gr.Textbox(placeholder="Paste your text here...", label="Text", lines=10, max_lines=15)
|
236 |
+
file_input = gr.File(label="Upload File", file_types=[".txt", ".pdf"])
|
237 |
+
|
238 |
+
with gr.Row():
|
239 |
+
wc = gr.Markdown("0/500 words (Minimum 50 words)")
|
240 |
+
with gr.Row():
|
241 |
+
check_button = gr.Button("Check Origin", variant="primary", interactive=False)
|
242 |
+
clear_button = gr.ClearButton([input_text, file_input], variant="stop")
|
243 |
+
|
244 |
+
out = gr.Label(label='Result')
|
245 |
+
clear_button.add(out)
|
246 |
+
|
247 |
+
check_button.click(fn=update_detection_tab, inputs=[input_text, file_input, radio_button], outputs=out)
|
248 |
+
|
249 |
+
input_text.change(count_words_detection_tab, input_text, wc, show_progress=False)
|
250 |
+
input_text.input(
|
251 |
+
active_button_detection_tab,
|
252 |
+
[input_text, file_input],
|
253 |
+
[check_button],
|
254 |
+
)
|
255 |
+
|
256 |
+
file_input.upload(
|
257 |
+
active_button_detection_tab,
|
258 |
+
[input_text, file_input],
|
259 |
+
[check_button],
|
260 |
+
)
|
261 |
+
|
262 |
+
clear_button.click(
|
263 |
+
clear_detection_tab,
|
264 |
+
inputs=[],
|
265 |
+
outputs=[input_text, file_input, check_button],
|
266 |
+
)
|
267 |
|
268 |
+
|
269 |
+
# Adding JavaScript to simulate file input click
|
270 |
+
gr.Markdown(
|
271 |
+
"""
|
272 |
+
<script>
|
273 |
+
document.addEventListener("DOMContentLoaded", function() {
|
274 |
+
const uploadButton = Array.from(document.getElementsByTagName('button')).find(el => el.innerText === "Upload File");
|
275 |
+
if (uploadButton) {
|
276 |
+
uploadButton.onclick = function() {
|
277 |
+
document.querySelector('input[type="file"]').click();
|
278 |
+
};
|
279 |
+
}
|
280 |
+
});
|
281 |
+
</script>
|
282 |
+
"""
|
283 |
+
)
|
284 |
+
|
285 |
+
with gr.Tab('Challenge Yourself!'):
|
286 |
+
gr.Markdown(
|
287 |
+
"""
|
288 |
+
<style>
|
289 |
+
.gr-button-secondary {
|
290 |
+
width: 100px;
|
291 |
+
height: 30px;
|
292 |
+
padding: 5px;
|
293 |
+
}
|
294 |
+
.gr-row {
|
295 |
+
display: flex;
|
296 |
+
align-items: center;
|
297 |
+
gap: 10px;
|
298 |
+
}
|
299 |
+
.gr-block {
|
300 |
+
padding: 20px;
|
301 |
+
}
|
302 |
+
.gr-markdown p {
|
303 |
+
font-size: 16px;
|
304 |
}
|
305 |
+
</style>
|
306 |
+
<span style='font-family: Arial, sans-serif; font-size: 20px;'>Was this text written by <strong>human</strong> or <strong>AI</strong>?</span>
|
307 |
+
<p style='font-family: Arial, sans-serif;'>Try detecting one of our sample texts:</p>
|
308 |
+
"""
|
309 |
+
)
|
310 |
+
|
311 |
+
|
312 |
+
with gr.Row():
|
313 |
+
generate = gr.Button("Generate Sample Text", variant="primary")
|
314 |
+
clear = gr.ClearButton([], variant="stop")
|
315 |
+
|
316 |
+
with gr.Row():
|
317 |
+
text = gr.Textbox(value="", label="Text", lines=20, interactive=False)
|
318 |
+
|
319 |
+
with gr.Row():
|
320 |
+
|
321 |
+
mg = gr.Button("Machine-Generated", variant="secondary", interactive=False)
|
322 |
+
hw = gr.Button("Human-Written", variant="secondary", interactive=False)
|
323 |
+
mh = gr.Button("Machine-Humanized", variant="secondary", interactive=False)
|
324 |
+
mp = gr.Button("Machine-Polished", variant="secondary", interactive=False)
|
325 |
+
|
326 |
+
with gr.Row():
|
327 |
+
result = gr.Label(label="Result", value="")
|
328 |
+
|
329 |
+
clear.add([result, text])
|
330 |
+
generate.click(generate_text_challenge_tab, [], [text, mg, hw, mh, mp, result])
|
331 |
+
for button in [mg, hw, mh, mp]:
|
332 |
+
button.click(show_result_challenge_tab, [button], [result, mg, hw, mh, mp])
|
333 |
+
|
334 |
+
clear.click(clear_challenge_tab, [], [mg, hw, mh, mp, result])
|
335 |
+
|
336 |
|
337 |
+
demo.launch(share=False)
|
338 |
|