Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
###======================== Reference ========================###
|
| 2 |
+
# Text Detector Model: https://huggingface.co/fakespot-ai/roberta-base-ai-text-detection-v1
|
| 3 |
+
# LLM Model: OpenAI GPT-5 Mini
|
| 4 |
+
# Agentic frameworks: LangChain, LangGraph
|
| 5 |
+
# UI: Gradio
|
| 6 |
+
###===========================================================###
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
from urllib.parse import uses_query
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
|
| 12 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
| 13 |
+
from langchain_core.tools import tool
|
| 14 |
+
from langchain_openai import ChatOpenAI
|
| 15 |
+
from langgraph.prebuilt import create_react_agent
|
| 16 |
+
|
| 17 |
+
### For fetching the AI text detector model (from HF)
|
| 18 |
+
from transformers import pipeline
|
| 19 |
+
import torch
|
| 20 |
+
|
| 21 |
+
import gradio as gr
|
| 22 |
+
|
| 23 |
+
### Clean text for better detection performance
|
| 24 |
+
from utils import clean_text
|
| 25 |
+
from html_blocks import FEATURES_HTML
|
| 26 |
+
from css_blocks import CSS_ELEMENTS
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
load_dotenv()
|
| 30 |
+
|
| 31 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 32 |
+
OPENAI_MODEL_ID = "gpt-5-mini"
|
| 33 |
+
TEXT_DETECTOR_MODEL_ID = "fakespot-ai/roberta-base-ai-text-detection-v1"
|
| 34 |
+
llm = ChatOpenAI(api_key=OPENAI_API_KEY, model=OPENAI_MODEL_ID)
|
| 35 |
+
|
| 36 |
+
system_message = SystemMessage("You are a helpful assistant that most accurately can tell ai written text from human written one.")
|
| 37 |
+
user_input="""Brooo, you won’t believe what happened today 😭 So I’m just minding my own business, right? Grabbing coffee like a civilized human being, and this barista legit calls out “Caramel macchiato for Michael” — and I’m like “bet, that’s me,” grab it, walk out all confident… only to realize halfway down the street it says soy latte, extra shot, no whip 😭😭 Bro, I straight up stole someone’s drink and been sipping on sadness the whole way home. It was actually kinda fire tho ngl 😅 Anyway, how’s your day been? You still surviving or already plotting your escape from adulthood?
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@tool
|
| 42 |
+
def detect_text(text):
|
| 43 |
+
""" Process AI text detection using fakespot-ai/roberta-base-ai-text-detection-v1 model.
|
| 44 |
+
It returns its result in a List[Dict] form.
|
| 45 |
+
e.g. [{'label': 'AI', 'score': 0.9998624324798584}]
|
| 46 |
+
"""
|
| 47 |
+
classifier = pipeline(
|
| 48 |
+
"text-classification",
|
| 49 |
+
model=TEXT_DETECTOR_MODEL_ID
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
cleaned_text = classifier(clean_text(text))
|
| 53 |
+
label_result = cleaned_text[0]["label"]
|
| 54 |
+
score_result = cleaned_text[0]["score"]
|
| 55 |
+
|
| 56 |
+
return label_result, score_result
|
| 57 |
+
|
| 58 |
+
def generate_dynamic_query(text, label, score):
|
| 59 |
+
|
| 60 |
+
query = f"""
|
| 61 |
+
Detect the text of the given input: {text}
|
| 62 |
+
AI-text likelihood score is given by another open source llm and the score will be given through the tool call as below:
|
| 63 |
+
Label: {label}
|
| 64 |
+
Score: {score:.3f}
|
| 65 |
+
|
| 66 |
+
Based on all those(input text for your own analysis) and the score given by the function,
|
| 67 |
+
give the final answer within 3-5 lines why it's assumed to be human or AI written text in a narrative and descriptive manner.
|
| 68 |
+
"""
|
| 69 |
+
return query
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def run_agent(text_input):
|
| 73 |
+
|
| 74 |
+
### Run the text detector model and extract label(whether AI or Human text) and likelihood score
|
| 75 |
+
### The result can be passively sent to the LLM but explicitly extracts label and score
|
| 76 |
+
### as the LLM doesn't take the accurate values by tool calling, for some reason I couldn't figure out
|
| 77 |
+
### So in that way, the run_agent calls detect_text twice which results in a higher latency for a couple more seconds.
|
| 78 |
+
label, score = detect_text(text_input)
|
| 79 |
+
query = generate_dynamic_query(text_input, label, score)
|
| 80 |
+
|
| 81 |
+
tools = [detect_text]
|
| 82 |
+
|
| 83 |
+
### Issue: Tool calling is actually redundant in this case -- as label, score = detect_text(user_text) actually runs the function separately and extract all the llm needs to take
|
| 84 |
+
agent_executor = create_react_agent(
|
| 85 |
+
model=llm, tools=tools, prompt=system_message
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
result = agent_executor.invoke(
|
| 89 |
+
{"messages": [{"role": "user", "content": query}]}
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
### Process output formats
|
| 93 |
+
label = "🧒 Likely " + label if label == "Human" else "🤖 Likely " + label
|
| 94 |
+
score = f"{score * 100: .2f}" + "%"
|
| 95 |
+
|
| 96 |
+
### Return 3 elements in a tuple: content, text label and score
|
| 97 |
+
return result["messages"][-1].content, label, score
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
###================= User Interface =================###
|
| 101 |
+
|
| 102 |
+
with gr.Blocks(css=CSS_ELEMENTS, title="AI WRITTEN TEXT DETECTOR") as demo:
|
| 103 |
+
gr.Markdown(
|
| 104 |
+
"""
|
| 105 |
+
## 🕵️ AI WRITTEN TEXT DETECTOR
|
| 106 |
+
Analyze your text and discover how likely it is to be AI-generated.
|
| 107 |
+
""",
|
| 108 |
+
elem_id="app_title"
|
| 109 |
+
)
|
| 110 |
+
with gr.Column():
|
| 111 |
+
with gr.Row():
|
| 112 |
+
inp = gr.TextArea(placeholder="Paste your text to analyze", label="Text Input", lines=20, scale=2)
|
| 113 |
+
features_desc = gr.HTML(FEATURES_HTML, elem_classes=["features_description"])
|
| 114 |
+
|
| 115 |
+
button = gr.Button("Analyze Text", elem_classes=["analyze_btn"])
|
| 116 |
+
|
| 117 |
+
with gr.Column():
|
| 118 |
+
with gr.Row():
|
| 119 |
+
label_output = gr.Textbox(
|
| 120 |
+
label="Detected Label",
|
| 121 |
+
placeholder="AI / Human",
|
| 122 |
+
scale=1
|
| 123 |
+
)
|
| 124 |
+
score_output = gr.Textbox(
|
| 125 |
+
label="Confidence Score",
|
| 126 |
+
placeholder="0.000",
|
| 127 |
+
scale=1
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
analysis_output = gr.TextArea(
|
| 131 |
+
label="Analysis Result",
|
| 132 |
+
placeholder="Model's explanation will appear here...",
|
| 133 |
+
lines=6
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
button.click(
|
| 137 |
+
fn=run_agent,
|
| 138 |
+
inputs=inp,
|
| 139 |
+
outputs=[analysis_output, label_output, score_output]
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
port = os.getenv("PORT", 7860)
|
| 144 |
+
demo.launch(server_name="0.0.0.0", server_port=port)
|
| 145 |
+
|
| 146 |
+
|