aliasgerovs's picture
Update app.py
1e1cfc2 verified
raw
history blame
12.5 kB
import gradio as gr
import numpy as np
from datetime import date
from predictors import predict_bc_scores, predict_mc_scores
from predictors import update,update_main, correct_text, split_text
from analysis import depth_analysis
from predictors import predict_quillbot
from plagiarism import plagiarism_check, build_date, html_highlight
from highlighter import segmented_higlighter
from utils import extract_text_from_pdf, len_validator, extract_text_from_html
import yaml
from functools import partial
from audio import assemblyai_transcribe
import yt_dlp
import os
import pandas as pd
from const import plag_script
from datasets import load_dataset, Dataset
np.set_printoptions(suppress=True)
with open("config.yaml", "r") as file:
params = yaml.safe_load(file)
model_list = params["MC_OUTPUT_LABELS"]
analyze_and_highlight_bc = partial(segmented_higlighter, model_type="bc")
analyze_and_highlight_quillbot = partial(
segmented_higlighter, model_type="quillbot"
)
import nltk
nltk.download('punkt_tab')
def save_request(email, video_url):
# Save the email and video URL to the CSV file
if email is None or email == "":
return "Please enter your email.", gr.update(visible=True)
dat = load_dataset(requests_repo)["train"]
df = dat.to_pandas()
new_row = pd.DataFrame(
{"email": [email], "video_url": [video_url], "status": "pending"}
)
df = pd.concat([df, new_row], ignore_index=True)
dat = Dataset.from_pandas(df)
dat.push_to_hub(requests_repo)
return "Your request has been saved.", gr.update(visible=False)
def ai_generated_test(option, bias_buster_selected, input):
if bias_buster_selected:
input = update(input)
if option == "Human vs AI":
return predict_bc_scores(input), None
elif option == "Human vs AI Source Models":
return predict_bc_scores(input), predict_mc_scores(input)
return None, None
# COMBINED
def main(
ai_option,
plag_option,
input,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
):
formatted_tokens = html_highlight(
plag_option,
input,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
)
depth_analysis_plot = depth_analysis(input, bias_buster_selected)
bc_score = predict_bc_scores(input)
mc_score = predict_mc_scores(input)
quilscore = predict_quillbot(input, bias_buster_selected)
return (
bc_score,
mc_score,
formatted_tokens,
depth_analysis_plot,
quilscore,
)
# START OF GRADIO
title = "AI Detection and Source Analysis"
months = {
"January": "01",
"February": "02",
"March": "03",
"April": "04",
"May": "05",
"June": "06",
"July": "07",
"August": "08",
"September": "09",
"October": "10",
"November": "11",
"December": "12",
}
with gr.Blocks() as demo:
today = date.today()
# dd/mm/YY
d1 = today.strftime("%d/%B/%Y")
d1 = d1.split("/")
domain_list = ["com", "org", "net", "int", "edu", "gov", "mil"]
gr.Markdown(
"""
# AI Detection and Source Analysis
"""
)
with gr.Row():
input_text = gr.Textbox(label="Input text", lines=6, placeholder="")
file_input = gr.File(label="Upload PDF")
file_input.change(
fn=extract_text_from_pdf, inputs=file_input, outputs=input_text
)
with gr.Column(visible=False) as request_row:
with gr.Row():
email_input = gr.Textbox(label="Email")
youtube_url_input = gr.Textbox(label="YouTube Video URL")
with gr.Row():
video_submit_btn = gr.Button("Submit Video Request")
with gr.Row():
url_input = gr.Textbox(
label="Input Page URL to check", lines=1, placeholder="")
url_input.change(
fn=extract_text_from_html, inputs=url_input, outputs=input_text)
audio_url_input = gr.Textbox(label="Input YouTube URL to check", lines=1, placeholder="")
audio_url_input.change(
fn=assemblyai_transcribe, inputs=audio_url_input, outputs=input_text
)
video_submit_btn.click(
fn=save_request,
inputs=[email_input, youtube_url_input],
outputs=[input_text, request_row],
api_name="video_request",
)
char_count = gr.Textbox(label="Minumum Character Limit Check")
input_text.change(fn=len_validator, inputs=input_text, outputs=char_count)
with gr.Row():
btn = gr.Button("Deception Filter")
out = gr.Textbox(label="Corrected Full Input", interactive=False)
corrections_output = gr.Textbox(label="Corrections", interactive=False)
btn.click(fn=update_main, inputs=input_text, outputs=[out, corrections_output])
with gr.Row():
models = gr.Dropdown(
model_list,
value=model_list,
multiselect=True,
label="Models to test against",
)
with gr.Row():
with gr.Column():
ai_option = gr.Radio(
[
"Human vs AI",
"Human vs AI Source Models",
],
label="Choose an option please.",
)
with gr.Column():
bias_buster_selected = gr.Checkbox(label="Bias Remover")
with gr.Column():
plag_option = gr.Radio(
["Standard", "Advanced"], label="Choose an option please."
)
with gr.Row():
source_block_size = gr.Dropdown(
choices=["Sentence", "Paragraph"],
label="Source Check Granularity",
value="Sentence",
interactive=True,
)
with gr.Row():
with gr.Column():
only_ai_btn = gr.Button("AI Check")
with gr.Column():
only_plagiarism_btn = gr.Button("Source Check")
with gr.Column():
quillbot_check = gr.Button("Humanized Text Check")
with gr.Row():
with gr.Column():
bc_highlighter_button = gr.Button("Human vs. AI Highlighter")
with gr.Column():
quillbot_highlighter_button = gr.Button("Humanized Highlighter")
with gr.Row():
depth_analysis_btn = gr.Button("Detailed Writing Analysis")
with gr.Row():
full_check_btn = gr.Button("Full Check")
gr.Markdown(
"""
## Output
"""
)
with gr.Row():
with gr.Column():
bcLabel = gr.Label(label="Source")
with gr.Column():
mcLabel = gr.Label(label="Creator")
with gr.Row():
with gr.Column():
bc_highlighter_output = gr.HTML(label="Human vs. AI Highlighter")
with gr.Row():
with gr.Column():
QLabel = gr.Label(label="Humanized")
with gr.Row():
quillbot_highlighter_output = gr.HTML(label="Humanized Highlighter")
with gr.Group():
with gr.Row():
month_from = gr.Dropdown(
choices=months,
label="From Month",
value="January",
interactive=True,
)
day_from = gr.Textbox(label="From Day", value="01")
year_from = gr.Textbox(label="From Year", value="2000")
# from_date_button = gr.Button("Submit")
with gr.Row():
month_to = gr.Dropdown(
choices=months,
label="To Month",
value=d1[1],
interactive=True,
)
day_to = gr.Textbox(label="To Day", value=d1[0])
year_to = gr.Textbox(label="To Year", value=d1[2])
# to_date_button = gr.Button("Submit")
with gr.Row():
domains_to_skip = gr.Dropdown(
domain_list,
multiselect=True,
label="Domain To Skip",
)
with gr.Row():
with gr.Column():
sentenceBreakdown = gr.HTML(
label="Source Detection Sentence Breakdown",
value="Source Detection Sentence Breakdown",
)
with gr.Row():
with gr.Column():
writing_analysis_plot = gr.Plot(label="Writing Analysis Plot")
with gr.Column():
interpretation = """
<h2>Writing Analysis Interpretation</h2>
<ul>
<li><b>Lexical Diversity</b>: This feature measures the range of unique words used in a text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Vocabulary Level</b>: This feature assesses the complexity of the words used in a text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Unique Words</b>: This feature counts the number of words that appear only once within the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Determiner Use</b>: This feature tracks the frequency of articles and quantifiers in the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Punctuation Variety</b>: This feature indicates the diversity of punctuation marks used in the text.
<ul>
<li>👤 Higher tends to be Human.</li>
</ul>
</li>
<li><b>Sentence Depth</b>: This feature evaluates the complexity of the sentence structures used in the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Vocabulary Stability</b>: This feature measures the consistency of vocabulary use throughout the text.
<ul>
<li>🤖 Higher tends to be AI.</li>
</ul>
</li>
<li><b>Entity Ratio</b>: This feature calculates the proportion of named entities, such as names and places, within the text.
<ul>
<li>👤 Higher tends to be Human.</li>
</ul>
</li>
<li><b>Perplexity</b>: This feature assesses the predictability of the text based on the sequence of words.
<ul>
<li>👤 Higher tends to be Human.</li>
</ul>
</li>
</ul>
"""
gr.HTML(interpretation, label="Interpretation of Writing Analysis")
full_check_btn.click(
fn=main,
inputs=[
ai_option,
plag_option,
input_text,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
],
outputs=[
bcLabel,
mcLabel,
sentenceBreakdown,
writing_analysis_plot,
QLabel,
],
api_name="main",
)
only_ai_btn.click(
fn=ai_generated_test,
inputs=[ai_option, bias_buster_selected, input_text],
outputs=[bcLabel, mcLabel],
api_name="ai_check",
)
quillbot_check.click(
fn=predict_quillbot,
inputs=[input_text, bias_buster_selected],
outputs=[QLabel],
api_name="quillbot_check",
)
only_plagiarism_btn.click(
# fn=plagiarism_check,
fn=html_highlight,
inputs=[
plag_option,
input_text,
year_from,
month_from,
day_from,
year_to,
month_to,
day_to,
domains_to_skip,
source_block_size,
],
outputs=[
sentenceBreakdown,
],
api_name="plagiarism_check",
)
depth_analysis_btn.click(
fn=depth_analysis,
inputs=[input_text, bias_buster_selected],
outputs=[writing_analysis_plot],
api_name="depth_analysis",
)
quillbot_highlighter_button.click(
fn=analyze_and_highlight_quillbot,
inputs=[input_text, bias_buster_selected],
outputs=[quillbot_highlighter_output],
api_name="humanized_highlighter",
)
bc_highlighter_button.click(
fn=analyze_and_highlight_bc,
inputs=[input_text, bias_buster_selected],
outputs=[bc_highlighter_output],
api_name="bc_highlighter",
)
date_from = ""
date_to = ""
if __name__ == "__main__":
demo.launch(
share=True, server_name="0.0.0.0", auth=("polygraf-admin", "test@aisd")
)