Spaces:
Sleeping
Sleeping
aliasgerovs
commited on
Commit
·
f45e494
1
Parent(s):
74f95a7
Updated
Browse files- app.py +15 -9
- highlighter.py +4 -1
- nohup.out +19 -94
- predictors.py +11 -2
app.py
CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
|
|
2 |
import numpy as np
|
3 |
from datetime import date
|
4 |
from predictors import predict_bc_scores, predict_mc_scores
|
5 |
-
from predictors import update, correct_text, split_text
|
6 |
from analysis import depth_analysis
|
7 |
from predictors import predict_quillbot
|
8 |
from plagiarism import plagiarism_check, build_date, html_highlight
|
@@ -26,7 +26,9 @@ analyze_and_highlight_quillbot = partial(
|
|
26 |
)
|
27 |
|
28 |
|
29 |
-
def ai_generated_test(option, input, models):
|
|
|
|
|
30 |
if option == "Human vs AI":
|
31 |
return predict_bc_scores(input), None
|
32 |
elif option == "Human vs AI Source Models":
|
@@ -132,9 +134,9 @@ with gr.Blocks() as demo:
|
|
132 |
btn = gr.Button("Bias Buster")
|
133 |
out = gr.Textbox(label="Bias Corrected Full Input", interactive=False)
|
134 |
corrections_output = gr.Textbox(label="Bias Corrections", interactive=False)
|
135 |
-
btn.click(fn=
|
136 |
|
137 |
-
with gr.Row():
|
138 |
models = gr.Dropdown(
|
139 |
model_list,
|
140 |
value=model_list,
|
@@ -151,6 +153,10 @@ with gr.Blocks() as demo:
|
|
151 |
],
|
152 |
label="Choose an option please.",
|
153 |
)
|
|
|
|
|
|
|
|
|
154 |
with gr.Column():
|
155 |
plag_option = gr.Radio(
|
156 |
["Standard", "Advanced"], label="Choose an option please."
|
@@ -331,7 +337,7 @@ with gr.Blocks() as demo:
|
|
331 |
|
332 |
only_ai_btn.click(
|
333 |
fn=ai_generated_test,
|
334 |
-
inputs=[ai_option, input_text, models],
|
335 |
# outputs=[bcLabel, mcLabel, mc1on1Label],
|
336 |
outputs=[bcLabel, mcLabel],
|
337 |
api_name="ai_check",
|
@@ -339,7 +345,7 @@ with gr.Blocks() as demo:
|
|
339 |
|
340 |
quillbot_check.click(
|
341 |
fn=predict_quillbot,
|
342 |
-
inputs=[input_text],
|
343 |
outputs=[QLabel],
|
344 |
api_name="quillbot_check",
|
345 |
)
|
@@ -367,21 +373,21 @@ with gr.Blocks() as demo:
|
|
367 |
|
368 |
depth_analysis_btn.click(
|
369 |
fn=depth_analysis,
|
370 |
-
inputs=[input_text],
|
371 |
outputs=[writing_analysis_plot],
|
372 |
api_name="depth_analysis",
|
373 |
)
|
374 |
|
375 |
quillbot_highlighter_button.click(
|
376 |
fn=analyze_and_highlight_quillbot,
|
377 |
-
inputs=[input_text],
|
378 |
outputs=[quillbot_highlighter_output],
|
379 |
api_name="humanized_highlighter",
|
380 |
)
|
381 |
|
382 |
bc_highlighter_button.click(
|
383 |
fn=analyze_and_highlight_bc,
|
384 |
-
inputs=[input_text],
|
385 |
outputs=[bc_highlighter_output],
|
386 |
api_name="bc_highlighter",
|
387 |
)
|
|
|
2 |
import numpy as np
|
3 |
from datetime import date
|
4 |
from predictors import predict_bc_scores, predict_mc_scores
|
5 |
+
from predictors import update,update_main, correct_text, split_text
|
6 |
from analysis import depth_analysis
|
7 |
from predictors import predict_quillbot
|
8 |
from plagiarism import plagiarism_check, build_date, html_highlight
|
|
|
26 |
)
|
27 |
|
28 |
|
29 |
+
def ai_generated_test(option, bias_buster_selected, input, models):
|
30 |
+
if bias_buster_selected:
|
31 |
+
input = update(input)
|
32 |
if option == "Human vs AI":
|
33 |
return predict_bc_scores(input), None
|
34 |
elif option == "Human vs AI Source Models":
|
|
|
134 |
btn = gr.Button("Bias Buster")
|
135 |
out = gr.Textbox(label="Bias Corrected Full Input", interactive=False)
|
136 |
corrections_output = gr.Textbox(label="Bias Corrections", interactive=False)
|
137 |
+
btn.click(fn=update_main, inputs=input_text, outputs=[out, corrections_output])
|
138 |
|
139 |
+
with gr.Row():
|
140 |
models = gr.Dropdown(
|
141 |
model_list,
|
142 |
value=model_list,
|
|
|
153 |
],
|
154 |
label="Choose an option please.",
|
155 |
)
|
156 |
+
|
157 |
+
with gr.Column():
|
158 |
+
bias_buster_selected = gr.Checkbox(label="Bias Remover")
|
159 |
+
|
160 |
with gr.Column():
|
161 |
plag_option = gr.Radio(
|
162 |
["Standard", "Advanced"], label="Choose an option please."
|
|
|
337 |
|
338 |
only_ai_btn.click(
|
339 |
fn=ai_generated_test,
|
340 |
+
inputs=[ai_option, bias_buster_selected, input_text, models],
|
341 |
# outputs=[bcLabel, mcLabel, mc1on1Label],
|
342 |
outputs=[bcLabel, mcLabel],
|
343 |
api_name="ai_check",
|
|
|
345 |
|
346 |
quillbot_check.click(
|
347 |
fn=predict_quillbot,
|
348 |
+
inputs=[input_text, bias_buster_selected],
|
349 |
outputs=[QLabel],
|
350 |
api_name="quillbot_check",
|
351 |
)
|
|
|
373 |
|
374 |
depth_analysis_btn.click(
|
375 |
fn=depth_analysis,
|
376 |
+
inputs=[bias_buster_selected, input_text],
|
377 |
outputs=[writing_analysis_plot],
|
378 |
api_name="depth_analysis",
|
379 |
)
|
380 |
|
381 |
quillbot_highlighter_button.click(
|
382 |
fn=analyze_and_highlight_quillbot,
|
383 |
+
inputs=[input_text, bias_buster_selected],
|
384 |
outputs=[quillbot_highlighter_output],
|
385 |
api_name="humanized_highlighter",
|
386 |
)
|
387 |
|
388 |
bc_highlighter_button.click(
|
389 |
fn=analyze_and_highlight_bc,
|
390 |
+
inputs=[input_text, bias_buster_selected],
|
391 |
outputs=[bc_highlighter_output],
|
392 |
api_name="bc_highlighter",
|
393 |
)
|
highlighter.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from lime.lime_text import LimeTextExplainer
|
2 |
from nltk.tokenize import sent_tokenize
|
3 |
from predictors import predict_for_explainanility
|
|
|
4 |
|
5 |
|
6 |
def explainer(text, model_type):
|
@@ -25,7 +26,9 @@ def explainer(text, model_type):
|
|
25 |
return sentences_weights, exp
|
26 |
|
27 |
|
28 |
-
def analyze_and_highlight(text, model_type):
|
|
|
|
|
29 |
|
30 |
highlighted_text = ""
|
31 |
sentences_weights, _ = explainer(text, model_type)
|
|
|
1 |
from lime.lime_text import LimeTextExplainer
|
2 |
from nltk.tokenize import sent_tokenize
|
3 |
from predictors import predict_for_explainanility
|
4 |
+
from predictors import update, correct_text, split_text
|
5 |
|
6 |
|
7 |
def explainer(text, model_type):
|
|
|
26 |
return sentences_weights, exp
|
27 |
|
28 |
|
29 |
+
def analyze_and_highlight(text, bias_buster_selected, model_type):
|
30 |
+
if bias_buster_selected:
|
31 |
+
text = update(text)
|
32 |
|
33 |
highlighted_text = ""
|
34 |
sentences_weights, _ = explainer(text, model_type)
|
nohup.out
CHANGED
@@ -1,99 +1,24 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
[nltk_data] Package punkt is already up-to-date!
|
8 |
-
[nltk_data] Downloading package stopwords to /root/nltk_data...
|
9 |
-
[nltk_data] Package stopwords is already up-to-date!
|
10 |
-
The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
|
11 |
-
The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
|
12 |
-
The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
|
13 |
-
The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
|
14 |
-
The BetterTransformer implementation does not support padding during training, as the fused kernels do not support attention masks. Beware that passing padded batched data during training may result in unexpected outputs. Please refer to https://huggingface.co/docs/optimum/bettertransformer/overview for more details.
|
15 |
-
[nltk_data] Downloading package cmudict to /root/nltk_data...
|
16 |
-
[nltk_data] Package cmudict is already up-to-date!
|
17 |
-
[nltk_data] Downloading package punkt to /root/nltk_data...
|
18 |
-
[nltk_data] Package punkt is already up-to-date!
|
19 |
-
[nltk_data] Downloading package stopwords to /root/nltk_data...
|
20 |
-
[nltk_data] Package stopwords is already up-to-date!
|
21 |
-
[nltk_data] Downloading package wordnet to /root/nltk_data...
|
22 |
-
[nltk_data] Package wordnet is already up-to-date!
|
23 |
-
/usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
|
24 |
-
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
25 |
-
Collecting en_core_web_sm==2.3.1
|
26 |
-
Using cached en_core_web_sm-2.3.1-py3-none-any.whl
|
27 |
-
Requirement already satisfied: spacy<2.4.0,>=2.3.0 in /usr/local/lib/python3.9/dist-packages (from en_core_web_sm==2.3.1) (2.3.9)
|
28 |
-
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.9)
|
29 |
-
Requirement already satisfied: thinc<7.5.0,>=7.4.1 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.6)
|
30 |
-
Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (52.0.0)
|
31 |
-
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.10)
|
32 |
-
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.8)
|
33 |
-
Requirement already satisfied: blis<0.8.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.7.11)
|
34 |
-
Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.10.1)
|
35 |
-
Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.66.2)
|
36 |
-
Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.26.4)
|
37 |
-
Requirement already satisfied: requests<3.0.0,>=2.13.0 in /usr/lib/python3/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.25.1)
|
38 |
-
Requirement already satisfied: plac<1.2.0,>=0.9.6 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)
|
39 |
-
Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.2)
|
40 |
-
Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.7)
|
41 |
-
[38;5;2m✔ Download and installation successful[0m
|
42 |
-
You can now load the model via spacy.load('en_core_web_sm')
|
43 |
Traceback (most recent call last):
|
44 |
File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
|
45 |
response = await route_utils.call_process_api(
|
46 |
File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 270, in call_process_api
|
47 |
output = await app.get_blocks().process_api(
|
48 |
-
File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line
|
49 |
-
|
50 |
-
File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line
|
51 |
-
|
52 |
-
File "/usr/local/lib/python3.9/dist-packages/
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
|
62 |
-
NameError: name 'bias_checker' is not defined
|
63 |
-
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
64 |
-
To disable this warning, you can either:
|
65 |
-
- Avoid using `tokenizers` before the fork if possible
|
66 |
-
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
67 |
-
/usr/local/lib/python3.9/dist-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML
|
68 |
-
warnings.warn("Can't initialize NVML")
|
69 |
-
IMPORTANT: You are using gradio version 4.28.3, however version 4.29.0 is available, please upgrade.
|
70 |
-
--------
|
71 |
-
Running on local URL: http://0.0.0.0:80
|
72 |
-
Running on public URL: https://a5b565cd42a2675e81.gradio.live
|
73 |
-
|
74 |
-
This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
|
75 |
-
["OpenAI's chief scientist and co-founder, Ilya Sutskever, is leaving the artificial-intelligence company about six months after he voted to fire Chief Executive Sam Altman only to say he regretted the move days later"]
|
76 |
-
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
77 |
-
To disable this warning, you can either:
|
78 |
-
- Avoid using `tokenizers` before the fork if possible
|
79 |
-
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
80 |
-
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
81 |
-
To disable this warning, you can either:
|
82 |
-
- Avoid using `tokenizers` before the fork if possible
|
83 |
-
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
84 |
-
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
85 |
-
To disable this warning, you can either:
|
86 |
-
- Avoid using `tokenizers` before the fork if possible
|
87 |
-
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
88 |
-
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
89 |
-
To disable this warning, you can either:
|
90 |
-
- Avoid using `tokenizers` before the fork if possible
|
91 |
-
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
92 |
-
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
93 |
-
To disable this warning, you can either:
|
94 |
-
- Avoid using `tokenizers` before the fork if possible
|
95 |
-
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
96 |
-
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
|
97 |
-
To disable this warning, you can either:
|
98 |
-
- Avoid using `tokenizers` before the fork if possible
|
99 |
-
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
|
|
|
1 |
+
Original BC scores: AI: 0.983885645866394, HUMAN: 0.01611432246863842
|
2 |
+
Calibration BC scores: AI: 0.5142857142857142, HUMAN: 0.48571428571428577
|
3 |
+
Input Text: sOperation Title was an unsuccessful 1942 Allied attack on the German battleship Tirpitz during World War II. The Allies considered Tirpitz to be a major threat to their shipping and after several Royal Air Force heavy bomber raids failed to inflict any damage it was decided to use Royal Navy midget submarines instead. /s
|
4 |
+
|
5 |
+
correcting text..: 0%| | 0/2 [00:00<?, ?it/s]
|
6 |
+
correcting text..: 100%|██████████| 2/2 [00:00<00:00, 29.39it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
Traceback (most recent call last):
|
8 |
File "/usr/local/lib/python3.9/dist-packages/gradio/queueing.py", line 527, in process_events
|
9 |
response = await route_utils.call_process_api(
|
10 |
File "/usr/local/lib/python3.9/dist-packages/gradio/route_utils.py", line 270, in call_process_api
|
11 |
output = await app.get_blocks().process_api(
|
12 |
+
File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1856, in process_api
|
13 |
+
data = await self.postprocess_data(fn_index, result["prediction"], state)
|
14 |
+
File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1634, in postprocess_data
|
15 |
+
self.validate_outputs(fn_index, predictions) # type: ignore
|
16 |
+
File "/usr/local/lib/python3.9/dist-packages/gradio/blocks.py", line 1610, in validate_outputs
|
17 |
+
raise ValueError(
|
18 |
+
ValueError: An event handler (update) didn't receive enough output values (needed: 2, received: 1).
|
19 |
+
Wanted outputs:
|
20 |
+
[<gradio.components.textbox.Textbox object at 0x7f79abf202b0>, <gradio.components.textbox.Textbox object at 0x7f79abf20a60>]
|
21 |
+
Received outputs:
|
22 |
+
["Operation Title was an unsuccessful 1942 Allied attack on the German battleship Tirpitz during World War II. The Allies considered Tirpitz to be a major threat to their shipping and after several Royal Air Force heavy bomber raids failed to inflict any damage it was decided to use Royal Navy midget submarines instead."]
|
23 |
+
/usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (2.2.1) or chardet (4.0.0) doesn't match a supported version!
|
24 |
+
warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
predictors.py
CHANGED
@@ -115,12 +115,19 @@ def correct_text(text: str, bias_checker, bias_corrector, separator: str = " ")
|
|
115 |
return corrected_text, corrections
|
116 |
|
117 |
def update(text: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
text = clean(text, lower=False)
|
119 |
corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
|
120 |
corrections_display = "\n\n".join([f"Original: {orig}\nCorrected: {corr}" for orig, corr in corrections])
|
121 |
return corrected_text, corrections_display
|
122 |
|
123 |
-
|
124 |
def split_text_allow_complete_sentences_nltk(
|
125 |
text,
|
126 |
max_length=256,
|
@@ -200,7 +207,9 @@ def split_text_allow_complete_sentences_nltk(
|
|
200 |
return decoded_segments
|
201 |
|
202 |
|
203 |
-
def predict_quillbot(text):
|
|
|
|
|
204 |
with torch.no_grad():
|
205 |
quillbot_model.eval()
|
206 |
tokenized_text = quillbot_tokenizer(
|
|
|
115 |
return corrected_text, corrections
|
116 |
|
117 |
def update(text: str):
|
118 |
+
text = clean(text, lower=False)
|
119 |
+
corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
|
120 |
+
corrections_display = "".join([f"{corr}" for orig, corr in corrections])
|
121 |
+
if corrections_display == "":
|
122 |
+
corrections_display = text
|
123 |
+
return corrections_display
|
124 |
+
|
125 |
+
def update_main(text: str):
|
126 |
text = clean(text, lower=False)
|
127 |
corrected_text, corrections = correct_text(text, bias_checker, bias_corrector)
|
128 |
corrections_display = "\n\n".join([f"Original: {orig}\nCorrected: {corr}" for orig, corr in corrections])
|
129 |
return corrected_text, corrections_display
|
130 |
|
|
|
131 |
def split_text_allow_complete_sentences_nltk(
|
132 |
text,
|
133 |
max_length=256,
|
|
|
207 |
return decoded_segments
|
208 |
|
209 |
|
210 |
+
def predict_quillbot(text, bias_buster_selected):
|
211 |
+
if bias_buster_selected:
|
212 |
+
text = update(text)
|
213 |
with torch.no_grad():
|
214 |
quillbot_model.eval()
|
215 |
tokenized_text = quillbot_tokenizer(
|