minko186's picture
change granuality options to sentence/para
23ec282
raw
history blame
11 kB
import gradio as gr
import numpy as np
from datetime import date
from predictors import predict_bc_scores, predict_mc_scores
from predictors import update, correct_text, split_text
from analysis import depth_analysis
from predictors import predict_quillbot
from plagiarism import plagiarism_check, build_date, html_highlight
from highlighter import analyze_and_highlight
from utils import extract_text_from_pdf, len_validator
import yaml
from functools import partial
np.set_printoptions(suppress=True)
with open("config.yaml", "r") as file:
params = yaml.safe_load(file)
model_list = params["MC_OUTPUT_LABELS"]
analyze_and_highlight_bc = partial(analyze_and_highlight, model_type="bc")
analyze_and_highlight_quillbot = partial(
analyze_and_highlight, model_type="quillbot"
)
def ai_generated_test(option, input, models):
if option == "Human vs AI":
return predict_bc_scores(input), None
elif option == "Human vs AI Source Models":
return predict_bc_scores(input), predict_mc_scores(input, models)
return None, None
# COMBINED
def main(
ai_option,
plag_option,
input,
models,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
):
# formatted_tokens = plagiarism_check(
# plag_option,
# input,
# year_from,
# month_from,
# day_from,
# year_to,
# month_to,
# day_to,
# domains_to_skip,
# )
formatted_tokens = html_highlight(
plag_option,
input,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
)
depth_analysis_plot = depth_analysis(input)
bc_score = predict_bc_scores(input)
mc_score = predict_mc_scores(input, models)
quilscore = predict_quillbot(input)
return (
bc_score,
mc_score,
formatted_tokens,
depth_analysis_plot,
quilscore,
)
# START OF GRADIO
title = "AI Detection and Source Analysis"
months = {
"January": "01",
"February": "02",
"March": "03",
"April": "04",
"May": "05",
"June": "06",
"July": "07",
"August": "08",
"September": "09",
"October": "10",
"November": "11",
"December": "12",
}
with gr.Blocks() as demo:
today = date.today()
# dd/mm/YY
d1 = today.strftime("%d/%B/%Y")
d1 = d1.split("/")
domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
gr.Markdown(
"""
# AI Detection and Source Analysis
"""
)
with gr.Row():
input_text = gr.Textbox(label="Input text", lines=6, placeholder="")
file_input = gr.File(label="Upload PDF")
file_input.change(
fn=extract_text_from_pdf, inputs=file_input, outputs=input_text
)
char_count = gr.Textbox(label="Minumum Character Limit Check")
input_text.change(fn=len_validator, inputs=input_text, outputs=char_count)
# with gr.Row():
# btn = gr.Button("Bias Buster")
# out = gr.Textbox(label="Bias Corrected Full Input", interactive=False)
# corrections_output = gr.Textbox(label="Bias Corrections", interactive=False)
# btn.click(fn=update, inputs=input_text, outputs=[out, corrections_output])
with gr.Row():
models = gr.Dropdown(
model_list,
value=model_list,
multiselect=True,
label="Models to test against",
)
with gr.Row():
with gr.Column():
ai_option = gr.Radio(
[
"Human vs AI",
"Human vs AI Source Models",
# "Human vs AI Source Models (1 on 1)",
],
label="Choose an option please.",
)
with gr.Column():
plag_option = gr.Radio(
["Standard", "Advanced"], label="Choose an option please."
)
with gr.Row():
source_block_size = gr.Dropdown(
choices=["Sentence", "Paragraph"],
label="Source Check Granularity",
value="Sentence",
interactive=True,
)
with gr.Row():
with gr.Column():
only_ai_btn = gr.Button("AI Check")
with gr.Column():
only_plagiarism_btn = gr.Button("Source Check")
with gr.Column():
quillbot_check = gr.Button("Humanized Text Check")
with gr.Row():
with gr.Column():
bc_highlighter_button = gr.Button("Human vs. AI Highlighter")
with gr.Column():
quillbot_highlighter_button = gr.Button("Humanized Highlighter")
with gr.Row():
depth_analysis_btn = gr.Button("Detailed Writing Analysis")
with gr.Row():
full_check_btn = gr.Button("Full Check")
gr.Markdown(
"""
## Output
"""
)
with gr.Row():
with gr.Column():
bcLabel = gr.Label(label="Source")
with gr.Column():
mcLabel = gr.Label(label="Creator")
with gr.Row():
with gr.Column():
bc_highlighter_output = gr.HTML(label="Human vs. AI Highlighter")
# with gr.Column():
# mc1on1Label = gr.Label(label="Creator(1 on 1 Approach)")
with gr.Row():
with gr.Column():
QLabel = gr.Label(label="Humanized")
with gr.Row():
quillbot_highlighter_output = gr.HTML(label="Humanized Highlighter")
with gr.Group():
with gr.Row():
month_from = gr.Dropdown(
choices=months,
label="From Month",
value="January",
interactive=True,
)
day_from = gr.Textbox(label="From Day", value="01")
year_from = gr.Textbox(label="From Year", value="2000")
# from_date_button = gr.Button("Submit")
with gr.Row():
month_to = gr.Dropdown(
choices=months,
label="To Month",
value=d1[1],
interactive=True,
)
day_to = gr.Textbox(label="To Day", value=d1[0])
year_to = gr.Textbox(label="To Year", value=d1[2])
# to_date_button = gr.Button("Submit")
with gr.Row():
domains_to_skip = gr.Dropdown(
domain_list,
multiselect=True,
label="Domain To Skip",
)
with gr.Row():
with gr.Column():
sentenceBreakdown = gr.HTML(
label="Source Detection Sentence Breakdown",
value="Source Detection Sentence Breakdown",
)
with gr.Row():
with gr.Column():
writing_analysis_plot = gr.Plot(label="Writing Analysis Plot")
with gr.Column():
interpretation = """
<h2>Writing Analysis Interpretation</h2>
<ul>
<li><b>Lexical Diversity</b>: This feature measures the range of unique words used in a text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Vocabulary Level</b>: This feature assesses the complexity of the words used in a text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Unique Words</b>: This feature counts the number of words that appear only once within the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Determiner Use</b>: This feature tracks the frequency of articles and quantifiers in the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Punctuation Variety</b>: This feature indicates the diversity of punctuation marks used in the text.
<ul>
<li>👤 Higher tends to be Human.</li>
</ul>
</li>
<li><b>Sentence Depth</b>: This feature evaluates the complexity of the sentence structures used in the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Vocabulary Stability</b>: This feature measures the consistency of vocabulary use throughout the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Entity Ratio</b>: This feature calculates the proportion of named entities, such as names and places, within the text.
<ul>
<li>👤 Higher tends to be Human.</li>
</ul>
</li>
<li><b>Perplexity</b>: This feature assesses the predictability of the text based on the sequence of words.
<ul>
<li>👤 Higher tends to be Human.</li>
</ul>
</li>
</ul>
"""
gr.HTML(interpretation, label="Interpretation of Writing Analysis")
full_check_btn.click(
fn=main,
inputs=[
ai_option,
plag_option,
input_text,
models,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
],
outputs=[
bcLabel,
mcLabel,
# mc1on1Label,
sentenceBreakdown,
writing_analysis_plot,
QLabel,
],
api_name="main",
)
only_ai_btn.click(
fn=ai_generated_test,
inputs=[ai_option, input_text, models],
# outputs=[bcLabel, mcLabel, mc1on1Label],
outputs=[bcLabel, mcLabel],
api_name="ai_check",
)
quillbot_check.click(
fn=predict_quillbot,
inputs=[input_text],
outputs=[QLabel],
api_name="quillbot_check",
)
only_plagiarism_btn.click(
# fn=plagiarism_check,
fn=html_highlight,
inputs=[
plag_option,
input_text,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
],
outputs=[
sentenceBreakdown,
],
api_name="plagiarism_check",
)
depth_analysis_btn.click(
fn=depth_analysis,
inputs=[input_text],
outputs=[writing_analysis_plot],
api_name="depth_analysis",
)
quillbot_highlighter_button.click(
fn=analyze_and_highlight_quillbot,
inputs=[input_text],
outputs=[quillbot_highlighter_output],
api_name="humanized_highlighter",
)
bc_highlighter_button.click(
fn=analyze_and_highlight_bc,
inputs=[input_text],
outputs=[bc_highlighter_output],
api_name="bc_highlighter",
)
date_from = ""
date_to = ""
if __name__ == "__main__":
demo.launch(
share=True, server_name="0.0.0.0", auth=("polygraf-admin", "test@aisd")
)