Spaces:

MightyOctopus
/

ai-written-text-detector

Running

App Files Files Community

MightyOctopus commited on Oct 11

Commit

846f2fe

verified ·

1 Parent(s): ab8168a

Create app.py

Browse files

Files changed (1) hide show

app.py +146 -0

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+###======================== Reference ========================###
+# Text Detector Model: https://huggingface.co/fakespot-ai/roberta-base-ai-text-detection-v1
+# LLM Model: OpenAI GPT-5 Mini
+# Agentic frameworks: LangChain, LangGraph
+# UI: Gradio
+###===========================================================###
+import os
+from urllib.parse import uses_query
+from dotenv import load_dotenv
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_core.tools import tool
+from langchain_openai import ChatOpenAI
+from langgraph.prebuilt import create_react_agent
+### For fetching the AI text detector model (from HF)
+from transformers import pipeline
+import torch
+import gradio as gr
+### Clean text for better detection performance
+from utils import clean_text
+from html_blocks import FEATURES_HTML
+from css_blocks import CSS_ELEMENTS
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+OPENAI_MODEL_ID = "gpt-5-mini"
+TEXT_DETECTOR_MODEL_ID = "fakespot-ai/roberta-base-ai-text-detection-v1"
+llm = ChatOpenAI(api_key=OPENAI_API_KEY, model=OPENAI_MODEL_ID)
+system_message = SystemMessage("You are a helpful assistant that most accurately can tell ai written text from human written one.")
+user_input="""Brooo, you won’t believe what happened today 😭 So I’m just minding my own business, right? Grabbing coffee like a civilized human being, and this barista legit calls out “Caramel macchiato for Michael” — and I’m like “bet, that’s me,” grab it, walk out all confident… only to realize halfway down the street it says soy latte, extra shot, no whip 😭😭 Bro, I straight up stole someone’s drink and been sipping on sadness the whole way home. It was actually kinda fire tho ngl 😅 Anyway, how’s your day been? You still surviving or already plotting your escape from adulthood?
+"""
+@tool
+def detect_text(text):
+    """ Process AI text detection using fakespot-ai/roberta-base-ai-text-detection-v1 model.
+    It returns its result in a List[Dict] form.
+    e.g. [{'label': 'AI', 'score': 0.9998624324798584}]
+    """
+    classifier = pipeline(
+        "text-classification",
+        model=TEXT_DETECTOR_MODEL_ID
+    )
+    cleaned_text = classifier(clean_text(text))
+    label_result = cleaned_text[0]["label"]
+    score_result = cleaned_text[0]["score"]
+    return label_result, score_result
+def generate_dynamic_query(text, label, score):
+    query = f"""
+    Detect the text of the given input: {text}
+    AI-text likelihood score is given by another open source llm and the score will be given through the tool call as below:
+    Label: {label}
+    Score: {score:.3f}
+    Based on all those(input text for your own analysis) and the score given by the function,
+    give the final answer within 3-5 lines why it's assumed to be human or AI written text in a narrative and descriptive manner.
+    """
+    return query
+def run_agent(text_input):
+    ### Run the text detector model and extract label(whether AI or Human text) and likelihood score
+    ### The result can be passively sent to the LLM but explicitly extracts label and score
+    ### as the LLM doesn't take the accurate values by tool calling, for some reason I couldn't figure out
+    ### So in that way, the run_agent calls detect_text twice which results in a higher latency for a couple more seconds.
+    label, score = detect_text(text_input)
+    query = generate_dynamic_query(text_input, label, score)
+    tools = [detect_text]
+    ### Issue: Tool calling is actually redundant in this case -- as label, score = detect_text(user_text) actually runs the function separately and extract all the llm needs to take
+    agent_executor = create_react_agent(
+        model=llm, tools=tools, prompt=system_message
+    )
+    result = agent_executor.invoke(
+        {"messages": [{"role": "user", "content": query}]}
+    )
+    ### Process output formats
+    label = "🧒 Likely " + label if label == "Human" else "🤖 Likely " + label
+    score = f"{score * 100: .2f}" + "%"
+    ### Return 3 elements in a tuple: content, text label and score
+    return result["messages"][-1].content, label, score
+###================= User Interface =================###
+with gr.Blocks(css=CSS_ELEMENTS, title="AI WRITTEN TEXT DETECTOR") as demo:
+    gr.Markdown(
+        """
+        ## 🕵️ AI WRITTEN TEXT DETECTOR
+        Analyze your text and discover how likely it is to be AI-generated.
+        """,
+        elem_id="app_title"
+    )
+    with gr.Column():
+        with gr.Row():
+            inp = gr.TextArea(placeholder="Paste your text to analyze", label="Text Input", lines=20, scale=2)
+            features_desc = gr.HTML(FEATURES_HTML, elem_classes=["features_description"])
+        button = gr.Button("Analyze Text", elem_classes=["analyze_btn"])
+    with gr.Column():
+        with gr.Row():
+            label_output = gr.Textbox(
+                label="Detected Label",
+                placeholder="AI / Human",
+                scale=1
+            )
+            score_output = gr.Textbox(
+                label="Confidence Score",
+                placeholder="0.000",
+                scale=1
+            )
+        analysis_output = gr.TextArea(
+            label="Analysis Result",
+            placeholder="Model's explanation will appear here...",
+            lines=6
+        )
+    button.click(
+        fn=run_agent,
+        inputs=inp,
+        outputs=[analysis_output, label_output, score_output]
+    )
+if __name__ == "__main__":
+    port = os.getenv("PORT", 7860)
+    demo.launch(server_name="0.0.0.0", server_port=port)