Spaces:

satyaki-mitra
/

AI_Text_Authenticator

Running

AI_Text_Authenticator / metrics /structural.py

satyaki-mitra

feat: Add Text Auth AI Detection System

edf1149 about 2 months ago

16.9 kB

	# DEPENDENCIES
	import re
	import numpy as np
	from typing import Any
	from typing import Dict
	from typing import List
	from loguru import logger
	from collections import Counter
	from metrics.base_metric import MetricResult
	from metrics.base_metric import StatisticalMetric
	from config.threshold_config import Domain
	from config.threshold_config import get_threshold_for_domain


	class StructuralMetric(StatisticalMetric):
	"""
	Structural analysis of text patterns with domain-aware thresholds

	Analyzes various structural features including:
	- Sentence length distribution and variance
	- Word length distribution
	- Punctuation patterns
	- Vocabulary richness
	- Burstiness (variation in patterns)
	"""
	def __init__(self):
	super().__init__(name = "structural",
	description = "Structural and pattern analysis of the text",
	)


	def compute(self, text: str, **kwargs) -> MetricResult:
	"""
	Compute structural features with domain aware thresholds

	Arguments:
	----------
	text { str } : Input text to analyze

	**kwargs : Additional parameters including 'domain'

	Returns:
	--------
	{ MetricResult } : MetricResult with AI/Human probabilities
	"""
	try:
	# Get domain-specific thresholds
	domain = kwargs.get('domain', Domain.GENERAL)
	domain_thresholds = get_threshold_for_domain(domain)
	structural_thresholds = domain_thresholds.structural

	# Extract all structural features
	features = self._extract_features(text)

	# Calculate raw AI probability based on features
	raw_ai_prob, confidence = self._calculate_ai_probability(features)

	# Apply domain-specific thresholds to convert raw score to probabilities
	ai_prob, human_prob, mixed_prob = self._apply_domain_thresholds(raw_ai_prob, structural_thresholds, features)

	# Apply confidence multiplier from domain thresholds
	confidence *= structural_thresholds.confidence_multiplier
	confidence = max(0.0, min(1.0, confidence))

	return MetricResult(metric_name = self.name,
	ai_probability = ai_prob,
	human_probability = human_prob,
	mixed_probability = mixed_prob,
	confidence = confidence,
	details = {**features,
	'domain_used' : domain.value,
	'ai_threshold' : structural_thresholds.ai_threshold,
	'human_threshold' : structural_thresholds.human_threshold,
	'raw_score' : raw_ai_prob,
	},
	)

	except Exception as e:
	logger.error(f"Error in {self.name} computation: {repr(e)}")
	return MetricResult(metric_name = self.name,
	ai_probability = 0.5,
	human_probability = 0.5,
	mixed_probability = 0.0,
	confidence = 0.0,
	error = str(e),
	)



	def _apply_domain_thresholds(self, raw_score: float, thresholds: Any, features: Dict[str, Any]) -> tuple:
	"""
	Apply domain-specific thresholds to convert raw score to probabilities
	"""
	ai_threshold = thresholds.ai_threshold # Domain-specific
	human_threshold = thresholds.human_threshold # Domain-specific

	# Calculate probabilities based on threshold distances
	if (raw_score >= ai_threshold):
	# Above AI threshold - strongly AI
	distance_from_threshold = raw_score - ai_threshold
	ai_prob = 0.7 + (distance_from_threshold * 0.3) # 0.7 to 1.0
	human_prob = 0.3 - (distance_from_threshold * 0.3) # 0.3 to 0.0

	elif (raw_score <= human_threshold):
	# Below human threshold - strongly human
	distance_from_threshold = human_threshold - raw_score
	ai_prob = 0.3 - (distance_from_threshold * 0.3) # 0.3 to 0.0
	human_prob = 0.7 + (distance_from_threshold * 0.3) # 0.7 to 1.0

	else:
	# Between thresholds - uncertain zone
	range_width = ai_threshold - human_threshold

	if (range_width > 0):
	position_in_range = (raw_score - human_threshold) / range_width
	ai_prob = 0.3 + (position_in_range * 0.4) # 0.3 to 0.7
	human_prob = 0.7 - (position_in_range * 0.4) # 0.7 to 0.3

	else:
	ai_prob = 0.5
	human_prob = 0.5

	# Ensure probabilities are valid
	ai_prob = max(0.0, min(1.0, ai_prob))
	human_prob = max(0.0, min(1.0, human_prob))

	# Calculate mixed probability based on statistical patterns
	mixed_prob = self._calculate_mixed_probability(features)

	# Normalize to sum to 1.0
	total = ai_prob + human_prob + mixed_prob

	if (total > 0):
	ai_prob /= total
	human_prob /= total
	mixed_prob /= total

	return ai_prob, human_prob, mixed_prob


	def _extract_features(self, text: str) -> Dict[str, Any]:
	"""
	Extract all structural features from text
	"""
	# Basic tokenization
	sentences = self._split_sentences(text)
	words = self._tokenize_words(text)

	# Sentence-level features
	sentence_lengths = [len(s.split()) for s in sentences]
	avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
	std_sentence_length = np.std(sentence_lengths) if len(sentence_lengths) > 1 else 0

	# Word-level features
	word_lengths = [len(w) for w in words]
	avg_word_length = np.mean(word_lengths) if word_lengths else 0
	std_word_length = np.std(word_lengths) if len(word_lengths) > 1 else 0

	# Vocabulary richness
	vocabulary_size = len(set(words))
	type_token_ratio = vocabulary_size / len(words) if words else 0

	# Punctuation analysis
	punctuation_density = self._calculate_punctuation_density(text)
	comma_frequency = text.count(',') / len(words) if words else 0

	# Burstiness (variation in patterns)
	burstiness = self._calculate_burstiness(sentence_lengths)

	# Uniformity scores
	length_uniformity = 1.0 - (std_sentence_length / avg_sentence_length) if avg_sentence_length > 0 else 0
	length_uniformity = max(0, min(1, length_uniformity))

	# Readability approximation (simplified)
	readability = self._calculate_readability(text, sentences, words)

	# Pattern detection
	repetition_score = self._detect_repetitive_patterns(words)

	# N-gram analysis
	bigram_diversity = self._calculate_ngram_diversity(words, n = 2)
	trigram_diversity = self._calculate_ngram_diversity(words, n = 3)

	return {"avg_sentence_length" : round(avg_sentence_length, 2),
	"std_sentence_length" : round(std_sentence_length, 2),
	"avg_word_length" : round(avg_word_length, 2),
	"std_word_length" : round(std_word_length, 2),
	"vocabulary_size" : vocabulary_size,
	"type_token_ratio" : round(type_token_ratio, 4),
	"punctuation_density" : round(punctuation_density, 4),
	"comma_frequency" : round(comma_frequency, 4),
	"burstiness_score" : round(burstiness, 4),
	"length_uniformity" : round(length_uniformity, 4),
	"readability_score" : round(readability, 2),
	"repetition_score" : round(repetition_score, 4),
	"bigram_diversity" : round(bigram_diversity, 4),
	"trigram_diversity" : round(trigram_diversity, 4),
	"num_sentences" : len(sentences),
	"num_words" : len(words),
	}


	def _split_sentences(self, text: str) -> List[str]:
	"""
	Split text into sentences
	"""
	# Simple sentence splitting
	sentences = re.split(r'[.!?]+', text)

	return [s.strip() for s in sentences if s.strip()]


	def _tokenize_words(self, text: str) -> List[str]:
	"""
	Tokenize text into words
	"""
	# Simple word tokenization
	words = re.findall(r'\b\w+\b', text.lower())

	return words


	def _calculate_punctuation_density(self, text: str) -> float:
	"""
	Calculate punctuation density
	"""
	punctuation = re.findall(r'[^\w\s]', text)
	total_chars = len(text)

	return len(punctuation) / total_chars if total_chars > 0 else 0


	def _calculate_burstiness(self, values: List[float]) -> float:
	"""
	Calculate burstiness score (variation in patterns)
	Higher burstiness typically indicates human writing
	"""
	if (len(values) < 2):
	return 0.0

	mean_val = np.mean(values)
	std_val = np.std(values)

	if (mean_val == 0):
	return 0.0

	# Coefficient of variation
	cv = std_val / mean_val

	# Normalize to 0-1 range
	burstiness = min(1.0, cv / 2.0)

	return burstiness


	def _calculate_readability(self, text: str, sentences: List[str], words: List[str]) -> float:
	"""
	Calculate simplified readability score
	(Approximation of Flesch Reading Ease)
	"""
	if not sentences or not words:
	return 0.0

	total_sentences = len(sentences)
	total_words = len(words)
	total_syllables = sum(self._count_syllables(word) for word in words)

	# Flesch Reading Ease approximation
	if ((total_sentences > 0) and (total_words > 0)):
	score = 206.835 - 1.015 * (total_words / total_sentences) - 84.6 * (total_syllables / total_words)
	return max(0, min(100, score))

	# Neutral score
	return 50.0


	def _count_syllables(self, word: str) -> int:
	"""
	Approximate syllable count for a word
	"""
	word = word.lower()
	vowels = 'aeiouy'
	syllable_count = 0
	previous_was_vowel = False

	for char in word:
	is_vowel = char in vowels
	if is_vowel and not previous_was_vowel:
	syllable_count += 1

	previous_was_vowel = is_vowel

	# Adjust for silent 'e'
	if (word.endswith('e')):
	syllable_count -= 1

	# Ensure at least one syllable
	if (syllable_count == 0):
	syllable_count = 1

	return syllable_count


	def _detect_repetitive_patterns(self, words: List[str]) -> float:
	"""
	Detect repetitive patterns in text
	AI text sometimes shows more repetition
	"""
	if (len(words) < 10):
	return 0.0

	# Check for repeated words in close proximity
	window_size = 10
	repetitions = 0

	for i in range(len(words) - window_size):
	window = words[i:i + window_size]
	word_counts = Counter(window)
	# Count words that appear more than once
	repetitions += sum(1 for count in word_counts.values() if count > 1)

	# Normalize
	max_repetitions = (len(words) - window_size) * window_size
	repetition_score = repetitions / max_repetitions if max_repetitions > 0 else 0

	return repetition_score


	def _calculate_ngram_diversity(self, words: List[str], n: int = 2) -> float:
	"""
	Calculate n-gram diversity
	Higher diversity often indicates human writing
	"""
	if (len(words) < n):
	return 0.0

	# Generate n-grams
	ngrams = [tuple(words[i:i+n]) for i in range(len(words) - n + 1)]

	# Calculate diversity as ratio of unique n-grams to total n-grams
	unique_ngrams = len(set(ngrams))
	total_ngrams = len(ngrams)

	diversity = unique_ngrams / total_ngrams if total_ngrams > 0 else 0

	return diversity


	def _calculate_ai_probability(self, features: Dict[str, Any]) -> tuple:
	"""
	Calculate AI probability based on structural features
	Returns raw score and confidence
	"""
	ai_indicators = list()

	# Low burstiness suggests AI (AI is more consistent)
	if (features['burstiness_score'] < 0.3):
	# Strong AI indicator
	ai_indicators.append(0.7)

	elif (features['burstiness_score'] < 0.5):
	# Moderate AI indicator
	ai_indicators.append(0.5)

	else:
	# Weak AI indicator
	ai_indicators.append(0.3)

	# High length uniformity suggests AI
	if (features['length_uniformity'] > 0.7):
	# Strong AI indicator
	ai_indicators.append(0.7)

	elif (features['length_uniformity'] > 0.5):
	# Moderate AI indicator
	ai_indicators.append(0.5)

	else:
	# Weak AI indicator
	ai_indicators.append(0.3)

	# Low n-gram diversity suggests AI
	if (features['bigram_diversity'] < 0.7):
	# Moderate AI indicator
	ai_indicators.append(0.6)

	else:
	# Weak AI indicator
	ai_indicators.append(0.4)

	# Moderate readability suggests AI (AI often produces "perfect" readability)
	if (60 <= features['readability_score'] <= 75):
	# Moderate AI indicator
	ai_indicators.append(0.6)

	else:
	# Weak AI indicator
	ai_indicators.append(0.4)

	# Low repetition suggests AI (AI avoids excessive repetition)
	if (features['repetition_score'] < 0.1):
	# Moderate AI indicator
	ai_indicators.append(0.6)

	elif (features['repetition_score'] < 0.2):
	# Neutral
	ai_indicators.append(0.5)

	else:
	# Weak AI indicator
	ai_indicators.append(0.3)

	# Calculate raw score and confidence
	raw_score = np.mean(ai_indicators) if ai_indicators else 0.5
	confidence = 1.0 - (np.std(ai_indicators) / 0.5) if ai_indicators else 0.5
	confidence = max(0.1, min(0.9, confidence))

	return raw_score, confidence


	def _calculate_mixed_probability(self, features: Dict[str, Any]) -> float:
	"""
	Calculate probability of mixed AI/Human content based on structural patterns
	"""
	mixed_indicators = []

	# High burstiness suggests mixed content (inconsistent patterns)
	if features['burstiness_score'] > 0.6:
	mixed_indicators.append(0.4)

	# Inconsistent sentence lengths might indicate mixing
	if (features['std_sentence_length'] > features['avg_sentence_length'] * 0.8):
	mixed_indicators.append(0.3)

	# Extreme values in multiple features might indicate mixing
	extreme_features = 0
	if (features['type_token_ratio'] < 0.3) or (features['type_token_ratio'] > 0.9):
	extreme_features += 1
	if (features['readability_score'] < 20) or (features['readability_score'] > 90):
	extreme_features += 1

	if (extreme_features >= 2):
	mixed_indicators.append(0.3)

	return min(0.3, np.mean(mixed_indicators)) if mixed_indicators else 0.0


	# Export
	__all__ = ["StructuralMetric"]