Spaces:

daasime
/

sop-audio-analyzer

Sleeping

App Files Files Community

sop-audio-analyzer / src /fraud_detection /reading_pattern.py

daasime

Add SOP Audio Analyzer app files

ebba35f 2 months ago

raw

history blame contribute delete

7.85 kB

	"""
	Reading Pattern Analyzer
	Detects if someone is reading prepared answers vs speaking naturally.

	Key indicators of reading:
	- Consistent speech rate (no natural variation)
	- Lack of filler words ("um", "uh", "like", "you know")
	- Regular pause patterns
	- Monotonic rhythm
	"""

	import numpy as np
	from dataclasses import dataclass, field
	from typing import List, Optional


	# Common filler words in English
	FILLER_WORDS = [
	'um', 'uh', 'uhm', 'umm', 'er', 'ah', 'like', 'you know',
	'basically', 'actually', 'so', 'well', 'i mean', 'kind of',
	'sort of', 'right', 'okay'
	]


	@dataclass
	class ReadingPatternResult:
	"""Result of reading pattern analysis."""
	is_reading: bool
	confidence: float # 0.0 to 1.0
	indicators: List[str] = field(default_factory=list)
	speech_rate_cv: float = 0.0 # Coefficient of variation
	filler_word_rate: float = 0.0 # Fillers per minute
	pause_regularity: float = 0.0 # How regular pauses are


	class ReadingPatternAnalyzer:
	"""
	Analyzes speech patterns to detect if someone is reading.

	Uses transcription with timestamps to analyze:
	- Speech rate variation
	- Filler word frequency
	- Pause patterns
	"""

	def __init__(self,
	min_speech_rate_cv: float = 0.15,
	min_filler_rate: float = 2.0,
	reading_threshold: float = 0.6):
	"""
	Args:
	min_speech_rate_cv: Minimum coefficient of variation for natural speech
	min_filler_rate: Minimum filler words per minute for natural speech
	reading_threshold: Confidence threshold to flag as reading
	"""
	self.min_speech_rate_cv = min_speech_rate_cv
	self.min_filler_rate = min_filler_rate
	self.reading_threshold = reading_threshold

	def analyze(self, transcription: str, word_timestamps: List[dict],
	duration_seconds: float) -> ReadingPatternResult:
	"""
	Analyze transcription for reading patterns.

	Args:
	transcription: Full transcription text
	word_timestamps: List of {'word': str, 'start': float, 'end': float}
	duration_seconds: Total audio duration

	Returns:
	ReadingPatternResult with analysis
	"""
	if not word_timestamps or len(word_timestamps) < 10:
	return ReadingPatternResult(
	is_reading=False,
	confidence=0.0,
	indicators=["Insufficient data for analysis"]
	)

	indicators = []
	scores = []

	# 1. Analyze speech rate variation
	speech_rate_cv = self._analyze_speech_rate(word_timestamps)
	if speech_rate_cv < self.min_speech_rate_cv:
	indicators.append(f"Constant speech rate (CV={speech_rate_cv:.2f})")
	scores.append(0.8)
	else:
	scores.append(0.2)

	# 2. Analyze filler word frequency
	filler_rate = self._analyze_filler_words(transcription, duration_seconds)
	if filler_rate < self.min_filler_rate:
	indicators.append(f"Few filler words ({filler_rate:.1f}/min)")
	scores.append(0.7)
	else:
	scores.append(0.2)

	# 3. Analyze pause patterns
	pause_regularity = self._analyze_pause_patterns(word_timestamps)
	if pause_regularity > 0.7:
	indicators.append(f"Regular pause pattern ({pause_regularity:.0%})")
	scores.append(0.6)
	else:
	scores.append(0.2)

	# 4. Check for natural speech markers
	has_corrections = self._has_self_corrections(transcription)
	if not has_corrections:
	indicators.append("No self-corrections detected")
	scores.append(0.5)
	else:
	scores.append(0.1)

	# Calculate overall confidence
	confidence = np.mean(scores)
	is_reading = confidence >= self.reading_threshold

	return ReadingPatternResult(
	is_reading=is_reading,
	confidence=round(confidence, 2),
	indicators=indicators,
	speech_rate_cv=round(speech_rate_cv, 3),
	filler_word_rate=round(filler_rate, 2),
	pause_regularity=round(pause_regularity, 2)
	)

	def _analyze_speech_rate(self, word_timestamps: List[dict]) -> float:
	"""
	Calculate coefficient of variation of speech rate.
	Natural speech has variable rate, reading is more constant.
	"""
	if len(word_timestamps) < 5:
	return 0.0

	# Calculate words per second in sliding windows
	window_size = 3.0 # seconds
	hop = 1.0 # seconds

	rates = []
	max_time = word_timestamps[-1].get('end', 0)

	for start in np.arange(0, max_time - window_size, hop):
	end = start + window_size
	words_in_window = [
	w for w in word_timestamps
	if w.get('start', 0) >= start and w.get('end', 0) <= end
	]
	if words_in_window:
	rate = len(words_in_window) / window_size
	rates.append(rate)

	if len(rates) < 3:
	return 0.0

	# Coefficient of variation (std / mean)
	mean_rate = np.mean(rates)
	if mean_rate == 0:
	return 0.0

	cv = np.std(rates) / mean_rate
	return cv

	def _analyze_filler_words(self, transcription: str,
	duration_seconds: float) -> float:
	"""
	Count filler words per minute.
	Natural speech has more fillers, reading has fewer.
	"""
	text_lower = transcription.lower()
	filler_count = 0

	for filler in FILLER_WORDS:
	# Count occurrences (word boundaries)
	import re
	pattern = r'\b' + re.escape(filler) + r'\b'
	matches = re.findall(pattern, text_lower)
	filler_count += len(matches)

	# Calculate per minute rate
	minutes = duration_seconds / 60.0
	if minutes < 0.1:
	return 0.0

	return filler_count / minutes

	def _analyze_pause_patterns(self, word_timestamps: List[dict]) -> float:
	"""
	Analyze regularity of pauses between words.
	Reading tends to have more regular pauses.
	"""
	if len(word_timestamps) < 5:
	return 0.0

	# Calculate gaps between consecutive words
	gaps = []
	for i in range(1, len(word_timestamps)):
	prev_end = word_timestamps[i-1].get('end', 0)
	curr_start = word_timestamps[i].get('start', 0)
	gap = curr_start - prev_end
	if gap > 0.05: # Ignore very small gaps
	gaps.append(gap)

	if len(gaps) < 3:
	return 0.0

	# Calculate regularity (inverse of coefficient of variation)
	mean_gap = np.mean(gaps)
	if mean_gap == 0:
	return 0.0

	cv = np.std(gaps) / mean_gap
	regularity = 1.0 / (1.0 + cv) # Higher = more regular

	return regularity

	def _has_self_corrections(self, transcription: str) -> bool:
	"""
	Check for self-corrections which indicate natural speech.
	E.g., "I went to the... I mean, I was going to the store"
	"""
	correction_markers = [
	'i mean', 'sorry', 'no wait', 'actually', 'let me',
	'what i meant', 'no no', 'sorry i', 'wait'
	]

	text_lower = transcription.lower()
	for marker in correction_markers:
	if marker in text_lower:
	return True

	# Check for repeated words (stammering/correction)
	words = text_lower.split()
	for i in range(1, len(words)):
	if words[i] == words[i-1] and len(words[i]) > 2:
	return True

	return False