Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -90,6 +90,7 @@ def analyze(text):
|
|
| 90 |
if not pure_sents:
|
| 91 |
return "—", "—", "<em>No sentences detected.</em>", None
|
| 92 |
|
|
|
|
| 93 |
windows = []
|
| 94 |
for i in range(len(pure_sents)):
|
| 95 |
start = max(0, i - 1)
|
|
@@ -100,12 +101,13 @@ def analyze(text):
|
|
| 100 |
logits = mod(**inputs).logits
|
| 101 |
probs = F.softmax(logits.float(), dim=-1)[:, 1].cpu().numpy().tolist()
|
| 102 |
|
|
|
|
| 103 |
lengths = [len(s.split()) for s in pure_sents]
|
| 104 |
total_words = sum(lengths)
|
| 105 |
weighted_avg = sum(p * l for p, l in zip(probs, lengths)) / total_words if total_words > 0 else 0
|
| 106 |
|
| 107 |
# -----------------------------
|
| 108 |
-
# HTML RECONSTRUCTION
|
| 109 |
# -----------------------------
|
| 110 |
highlighted_html = "<div style='font-family: sans-serif; line-height: 1.8;'>"
|
| 111 |
prob_map = {idx: probs[i] for i, idx in enumerate(pure_sents_indices)}
|
|
@@ -117,12 +119,14 @@ def analyze(text):
|
|
| 117 |
|
| 118 |
if i in prob_map:
|
| 119 |
score = prob_map[i]
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
else:
|
| 125 |
color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)"
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
highlighted_html += (
|
| 128 |
f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
|
|
@@ -133,15 +137,14 @@ def analyze(text):
|
|
| 133 |
highlighted_html += block
|
| 134 |
highlighted_html += "</div>"
|
| 135 |
|
| 136 |
-
# ---
|
| 137 |
-
if weighted_avg >
|
| 138 |
label = f"{weighted_avg:.0%} AI Content Detected"
|
| 139 |
display_score = f"{weighted_avg:.1%}"
|
| 140 |
else:
|
| 141 |
label = "0 or * AI Content Detected"
|
| 142 |
display_score = "*"
|
| 143 |
|
| 144 |
-
# Always return the dataframe if analysis was successful
|
| 145 |
df = pd.DataFrame({"Sentence": pure_sents, "AI Confidence": [f"{p:.1%}" for p in probs]})
|
| 146 |
return label, display_score, highlighted_html, df
|
| 147 |
|
|
@@ -149,8 +152,8 @@ def analyze(text):
|
|
| 149 |
# GRADIO INTERFACE
|
| 150 |
# -----------------------------
|
| 151 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 152 |
-
gr.Markdown("## 🕵️
|
| 153 |
-
gr.Markdown("Sentence-level analysis. **Min 300 words required.** Score masked (*) if ≤
|
| 154 |
|
| 155 |
with gr.Row():
|
| 156 |
with gr.Column(scale=3):
|
|
|
|
| 90 |
if not pure_sents:
|
| 91 |
return "—", "—", "<em>No sentences detected.</em>", None
|
| 92 |
|
| 93 |
+
# Sliding window inference
|
| 94 |
windows = []
|
| 95 |
for i in range(len(pure_sents)):
|
| 96 |
start = max(0, i - 1)
|
|
|
|
| 101 |
logits = mod(**inputs).logits
|
| 102 |
probs = F.softmax(logits.float(), dim=-1)[:, 1].cpu().numpy().tolist()
|
| 103 |
|
| 104 |
+
# Calculate Weighted Average
|
| 105 |
lengths = [len(s.split()) for s in pure_sents]
|
| 106 |
total_words = sum(lengths)
|
| 107 |
weighted_avg = sum(p * l for p, l in zip(probs, lengths)) / total_words if total_words > 0 else 0
|
| 108 |
|
| 109 |
# -----------------------------
|
| 110 |
+
# HTML RECONSTRUCTION (Red vs Green Only)
|
| 111 |
# -----------------------------
|
| 112 |
highlighted_html = "<div style='font-family: sans-serif; line-height: 1.8;'>"
|
| 113 |
prob_map = {idx: probs[i] for i, idx in enumerate(pure_sents_indices)}
|
|
|
|
| 119 |
|
| 120 |
if i in prob_map:
|
| 121 |
score = prob_map[i]
|
| 122 |
+
|
| 123 |
+
# Use THRESHOLD for binary color logic
|
| 124 |
+
if score > THRESHOLD:
|
| 125 |
+
# RED: Above the threshold (AI detected)
|
|
|
|
| 126 |
color, bg = "#b80d0d", "rgba(184, 13, 13, 0.15)"
|
| 127 |
+
else:
|
| 128 |
+
# GREEN: Below the threshold (Human-like)
|
| 129 |
+
color, bg = "#11823b", "rgba(17, 130, 59, 0.15)"
|
| 130 |
|
| 131 |
highlighted_html += (
|
| 132 |
f"<span style='background:{bg}; padding:2px 4px; border-radius:4px; border-bottom: 2px solid {color};' "
|
|
|
|
| 137 |
highlighted_html += block
|
| 138 |
highlighted_html += "</div>"
|
| 139 |
|
| 140 |
+
# --- FINAL VERDICT LOGIC ---
|
| 141 |
+
if weighted_avg > THRESHOLD:
|
| 142 |
label = f"{weighted_avg:.0%} AI Content Detected"
|
| 143 |
display_score = f"{weighted_avg:.1%}"
|
| 144 |
else:
|
| 145 |
label = "0 or * AI Content Detected"
|
| 146 |
display_score = "*"
|
| 147 |
|
|
|
|
| 148 |
df = pd.DataFrame({"Sentence": pure_sents, "AI Confidence": [f"{p:.1%}" for p in probs]})
|
| 149 |
return label, display_score, highlighted_html, df
|
| 150 |
|
|
|
|
| 152 |
# GRADIO INTERFACE
|
| 153 |
# -----------------------------
|
| 154 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 155 |
+
gr.Markdown("## 🕵️ Detector Pro")
|
| 156 |
+
gr.Markdown(f"Sentence-level analysis. **Min 300 words required.** Score masked (*) if ≤ {THRESHOLD*100:.0f}%.")
|
| 157 |
|
| 158 |
with gr.Row():
|
| 159 |
with gr.Column(scale=3):
|