daasime's picture
Add SOP Audio Analyzer app files
ebba35f
"""
Reading Pattern Analyzer
Detects if someone is reading prepared answers vs speaking naturally.
Key indicators of reading:
- Consistent speech rate (no natural variation)
- Lack of filler words ("um", "uh", "like", "you know")
- Regular pause patterns
- Monotonic rhythm
"""
import numpy as np
from dataclasses import dataclass, field
from typing import List, Optional
# Common filler words in English
FILLER_WORDS = [
'um', 'uh', 'uhm', 'umm', 'er', 'ah', 'like', 'you know',
'basically', 'actually', 'so', 'well', 'i mean', 'kind of',
'sort of', 'right', 'okay'
]
@dataclass
class ReadingPatternResult:
"""Result of reading pattern analysis."""
is_reading: bool
confidence: float # 0.0 to 1.0
indicators: List[str] = field(default_factory=list)
speech_rate_cv: float = 0.0 # Coefficient of variation
filler_word_rate: float = 0.0 # Fillers per minute
pause_regularity: float = 0.0 # How regular pauses are
class ReadingPatternAnalyzer:
"""
Analyzes speech patterns to detect if someone is reading.
Uses transcription with timestamps to analyze:
- Speech rate variation
- Filler word frequency
- Pause patterns
"""
def __init__(self,
min_speech_rate_cv: float = 0.15,
min_filler_rate: float = 2.0,
reading_threshold: float = 0.6):
"""
Args:
min_speech_rate_cv: Minimum coefficient of variation for natural speech
min_filler_rate: Minimum filler words per minute for natural speech
reading_threshold: Confidence threshold to flag as reading
"""
self.min_speech_rate_cv = min_speech_rate_cv
self.min_filler_rate = min_filler_rate
self.reading_threshold = reading_threshold
def analyze(self, transcription: str, word_timestamps: List[dict],
duration_seconds: float) -> ReadingPatternResult:
"""
Analyze transcription for reading patterns.
Args:
transcription: Full transcription text
word_timestamps: List of {'word': str, 'start': float, 'end': float}
duration_seconds: Total audio duration
Returns:
ReadingPatternResult with analysis
"""
if not word_timestamps or len(word_timestamps) < 10:
return ReadingPatternResult(
is_reading=False,
confidence=0.0,
indicators=["Insufficient data for analysis"]
)
indicators = []
scores = []
# 1. Analyze speech rate variation
speech_rate_cv = self._analyze_speech_rate(word_timestamps)
if speech_rate_cv < self.min_speech_rate_cv:
indicators.append(f"Constant speech rate (CV={speech_rate_cv:.2f})")
scores.append(0.8)
else:
scores.append(0.2)
# 2. Analyze filler word frequency
filler_rate = self._analyze_filler_words(transcription, duration_seconds)
if filler_rate < self.min_filler_rate:
indicators.append(f"Few filler words ({filler_rate:.1f}/min)")
scores.append(0.7)
else:
scores.append(0.2)
# 3. Analyze pause patterns
pause_regularity = self._analyze_pause_patterns(word_timestamps)
if pause_regularity > 0.7:
indicators.append(f"Regular pause pattern ({pause_regularity:.0%})")
scores.append(0.6)
else:
scores.append(0.2)
# 4. Check for natural speech markers
has_corrections = self._has_self_corrections(transcription)
if not has_corrections:
indicators.append("No self-corrections detected")
scores.append(0.5)
else:
scores.append(0.1)
# Calculate overall confidence
confidence = np.mean(scores)
is_reading = confidence >= self.reading_threshold
return ReadingPatternResult(
is_reading=is_reading,
confidence=round(confidence, 2),
indicators=indicators,
speech_rate_cv=round(speech_rate_cv, 3),
filler_word_rate=round(filler_rate, 2),
pause_regularity=round(pause_regularity, 2)
)
def _analyze_speech_rate(self, word_timestamps: List[dict]) -> float:
"""
Calculate coefficient of variation of speech rate.
Natural speech has variable rate, reading is more constant.
"""
if len(word_timestamps) < 5:
return 0.0
# Calculate words per second in sliding windows
window_size = 3.0 # seconds
hop = 1.0 # seconds
rates = []
max_time = word_timestamps[-1].get('end', 0)
for start in np.arange(0, max_time - window_size, hop):
end = start + window_size
words_in_window = [
w for w in word_timestamps
if w.get('start', 0) >= start and w.get('end', 0) <= end
]
if words_in_window:
rate = len(words_in_window) / window_size
rates.append(rate)
if len(rates) < 3:
return 0.0
# Coefficient of variation (std / mean)
mean_rate = np.mean(rates)
if mean_rate == 0:
return 0.0
cv = np.std(rates) / mean_rate
return cv
def _analyze_filler_words(self, transcription: str,
duration_seconds: float) -> float:
"""
Count filler words per minute.
Natural speech has more fillers, reading has fewer.
"""
text_lower = transcription.lower()
filler_count = 0
for filler in FILLER_WORDS:
# Count occurrences (word boundaries)
import re
pattern = r'\b' + re.escape(filler) + r'\b'
matches = re.findall(pattern, text_lower)
filler_count += len(matches)
# Calculate per minute rate
minutes = duration_seconds / 60.0
if minutes < 0.1:
return 0.0
return filler_count / minutes
def _analyze_pause_patterns(self, word_timestamps: List[dict]) -> float:
"""
Analyze regularity of pauses between words.
Reading tends to have more regular pauses.
"""
if len(word_timestamps) < 5:
return 0.0
# Calculate gaps between consecutive words
gaps = []
for i in range(1, len(word_timestamps)):
prev_end = word_timestamps[i-1].get('end', 0)
curr_start = word_timestamps[i].get('start', 0)
gap = curr_start - prev_end
if gap > 0.05: # Ignore very small gaps
gaps.append(gap)
if len(gaps) < 3:
return 0.0
# Calculate regularity (inverse of coefficient of variation)
mean_gap = np.mean(gaps)
if mean_gap == 0:
return 0.0
cv = np.std(gaps) / mean_gap
regularity = 1.0 / (1.0 + cv) # Higher = more regular
return regularity
def _has_self_corrections(self, transcription: str) -> bool:
"""
Check for self-corrections which indicate natural speech.
E.g., "I went to the... I mean, I was going to the store"
"""
correction_markers = [
'i mean', 'sorry', 'no wait', 'actually', 'let me',
'what i meant', 'no no', 'sorry i', 'wait'
]
text_lower = transcription.lower()
for marker in correction_markers:
if marker in text_lower:
return True
# Check for repeated words (stammering/correction)
words = text_lower.split()
for i in range(1, len(words)):
if words[i] == words[i-1] and len(words[i]) > 2:
return True
return False