CoolSpring commited on
Commit
28f34f2
·
unverified ·
1 Parent(s): bbedf33

Add application file

Browse files
Files changed (2) hide show
  1. app.py +105 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import nltk
5
+ from nltk.tokenize import sent_tokenize
6
+
7
+ # Set page config at the very beginning
8
+ st.set_page_config(page_title="LLM Detector", layout="centered")
9
+
10
+
11
+ # Download the punkt tokenizer for sentence splitting (with caching)
12
+ @st.cache_resource
13
+ def download_nltk_punkt():
14
+ nltk.download("punkt", quiet=True)
15
+
16
+
17
+ download_nltk_punkt()
18
+
19
+
20
+ # Load the model and tokenizer (with caching)
21
+ @st.cache_resource
22
+ def load_model_and_tokenizer():
23
+ model_name = "CoolSpring/creative-writing-llm-detector-deberta-v3-xsmall"
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
25
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
26
+ return tokenizer, model
27
+
28
+
29
+ tokenizer, model = load_model_and_tokenizer()
30
+
31
+
32
+ def classify_text(text):
33
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
34
+ with torch.no_grad():
35
+ logits = model(**inputs).logits
36
+ probabilities = torch.softmax(logits, dim=1)
37
+ return probabilities[0][1].item() # Probability of being AI-generated
38
+
39
+
40
+ def highlight_suspicious_sentences(text):
41
+ sentences = sent_tokenize(text)
42
+ scores = [classify_text(sentence) for sentence in sentences]
43
+ return sentences, scores
44
+
45
+
46
+ def get_color(score):
47
+ if score < 0.33:
48
+ return "rgba(144, 238, 144, 0.3)" # Light green
49
+ elif score < 0.66:
50
+ return "rgba(255, 255, 0, 0.3)" # Light yellow
51
+ else:
52
+ return "rgba(255, 99, 71, 0.3)" # Light red
53
+
54
+
55
+ st.title("🤖 LLM Detector")
56
+ st.write("Enter text to detect if it's written by an AI language model.")
57
+
58
+ # Use session state to store the input text
59
+ if "text_input" not in st.session_state:
60
+ st.session_state.text_input = ""
61
+
62
+ text_input = st.text_area(
63
+ "Enter your text here:", value=st.session_state.text_input, height=200
64
+ )
65
+
66
+ # Update session state when input changes
67
+ if text_input != st.session_state.text_input:
68
+ st.session_state.text_input = text_input
69
+
70
+ if st.button("Analyze and Highlight"):
71
+ if text_input:
72
+ overall_probability = classify_text(text_input)
73
+ st.markdown(
74
+ f"<h3>Overall probability of being AI-generated: <span style='color: {'red' if overall_probability > 0.5 else 'green'};'>{overall_probability:.2%}</span></h3>",
75
+ unsafe_allow_html=True,
76
+ )
77
+
78
+ st.markdown("### Sentence-level analysis:")
79
+ sentences, scores = highlight_suspicious_sentences(text_input)
80
+
81
+ for sentence, score in zip(sentences, scores):
82
+ color = get_color(score)
83
+ st.markdown(
84
+ f"<div style='background-color: {color}; padding: 10px; margin: 5px 0; border-radius: 5px;'><strong>{score:.2%}</strong> - {sentence}</div>",
85
+ unsafe_allow_html=True,
86
+ )
87
+ else:
88
+ st.warning("Please enter some text to analyze.")
89
+
90
+ how_it_works_text = """This LLM Detector uses [CoolSpring/creative-writing-llm-detector-deberta-v3-xsmall](https://huggingface.co/CoolSpring/creative-writing-llm-detector-deberta-v3-xsmall), a DeBERTa-v3-xsmall model fine-tuned for text classification.
91
+
92
+ It analyzes the input text and estimates the probability of it being generated by an AI language model.
93
+
94
+ The sentence-level analysis breaks down the input into individual sentences and analyzes each one separately, allowing you to see which parts of the text are more likely to be AI-generated.
95
+
96
+ Please note that this is not 100% accurate and should be used as a guide rather than a definitive measure."""
97
+
98
+ if st.button("Fill with Sample Text"):
99
+ st.session_state.text_input = "\n".join(how_it_works_text.splitlines()[2:])
100
+ st.rerun()
101
+
102
+ st.markdown(
103
+ f"""### How it works
104
+ {how_it_works_text}"""
105
+ )
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ torch
3
+ nltk