Spaces:

Neha13
/

AI_Content_Detector

Running

App Files Files Community

AI_Content_Detector / app.py

Neha13

Create app.py

285d2df verified 2 months ago

raw

history blame contribute delete

3.27 kB

	import streamlit as st
	from transformers import GPT2Tokenizer, GPT2LMHeadModel
	import torch
	import nltk
	from nltk.util import ngrams
	from nltk.probability import FreqDist
	import plotly.express as px
	import torch.nn.functional as F
	from collections import Counter
	from nltk.corpus import stopwords
	import string

	import nltk
	nltk.download('punkt')
	nltk.download('stopwords')
	# Initialize tokenizer and model
	tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
	model = GPT2LMHeadModel.from_pretrained('gpt2')

	def c_perplexity(text):
	"""Calculate the perplexity of the given text using GPT-2."""
	if not text.strip():
	return float('inf') # Return inf for empty input

	input_ids = tokenizer.encode(text, add_special_tokens=False, return_tensors='pt')
	if input_ids.size(1) == 0: # Check for empty input after encoding
	return float('inf')

	with torch.no_grad():
	outputs = model(input_ids)
	logits = outputs.logits

	loss = F.cross_entropy(logits.view(-1, logits.size(-1)), input_ids.view(-1))
	perplexity = torch.exp(loss)
	return perplexity.item()

	def c_burstiness(text):
	"""Calculate the burstiness of the given text."""
	tokens = nltk.word_tokenize(text.lower())
	if not tokens:
	return 0.0

	word_freq = FreqDist(tokens)
	repeated_count = sum(count > 1 for count in word_freq.values())
	b_score = repeated_count / len(word_freq) if len(word_freq) > 0 else 0.0
	return b_score

	def top_repword_count(text):
	"""Generate a bar chart of the top 10 most repeated words."""
	tokens = nltk.word_tokenize(text.lower())
	stop_words = set(stopwords.words('english'))
	tokens = [token for token in tokens if token not in stop_words and token not in string.punctuation]

	word_counts = Counter(tokens)
	top_words = word_counts.most_common(10)

	if not top_words:
	st.write("No significant words found.")
	return

	words, counts = zip(*top_words)
	fig = px.bar(x=words, y=counts, labels={'x': 'Words', 'y': 'Counts'}, title="Top 10 Most Repeated Words in the Text")
	st.plotly_chart(fig, user_container_width=True)

	# Streamlit app configuration
	st.set_page_config(layout="wide")

	st.title("AI Content Detector")

	text_area = st.text_area("Enter your text here!")

	if text_area:
	if st.button("Analyse the content"):
	col1, col2, col3 = st.columns([1, 2, 1])

	with col1:
	st.info("Your input text")
	st.success(text_area)

	with col2:
	st.info("Your output score")
	perplexity = c_perplexity(text_area)
	burstiness = c_burstiness(text_area)

	st.success(f"Perplexity score: {perplexity}")
	st.success(f"Burstiness score: {burstiness}")

	if perplexity > 40000 or burstiness < 0.24:
	st.error("Result: The text is likely AI-generated.")
	else:
	st.success("Result: The text is not AI-generated.")

	st.warning("Disclaimer: AI plagiarism detector apps can assist in identifying potential instances of plagiarism.")

	with col3:
	st.info("Basic Review")
	top_repword_count(text_area)