File size: 2,177 Bytes
8ad2ab3 aef3b1e 8ad2ab3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
"""
Compute Voice Clarity Score from audio file
"""
import librosa
import numpy as np
from typing import Dict, Any
from .vcs import calculate_voice_clarity_score, get_clarity_insight
def compute_voice_clarity_score(file_path: str, whisper_model) -> Dict[str, Any]:
"""
Compute Voice Clarity Score and its components from a speech sample.
Args:
file_path (str): Path to the audio file.
whisper_model: Transcription model (e.g., OpenAI Whisper or faster-whisper)
Returns:
dict: A dictionary containing Voice Clarity Score and component scores.
"""
# Transcribe audio
result = whisper_model.transcribe(file_path, word_timestamps=False, fp16=False)
transcript = result.get("text", "").strip()
segments = result.get("segments", [])
# Validate early
if not transcript or not segments:
raise ValueError("Empty transcript or segments from Whisper.")
# Load audio
y, sr = librosa.load(file_path, sr=None)
duration = len(y) / sr if sr else 0.0
if duration <= 0:
raise ValueError("Audio duration invalid or zero.")
# Calculate Voice Clarity Score
clarity_result = calculate_voice_clarity_score(y, sr, segments)
# Add word count and duration info for reference
word_count = len(transcript.split())
clarity_result["components"]["word_count"] = word_count
clarity_result["components"]["duration"] = duration
return clarity_result
def analyze_voice_quality(file_path: str, whisper_model) -> Dict[str, Any]:
"""
Comprehensive voice quality analysis including clarity.
Args:
file_path (str): Path to the audio file
whisper_model: Transcription model
Returns:
Dict[str, Any]: Complete voice quality analysis
"""
# Get Voice Clarity Score
clarity_results = compute_voice_clarity_score(file_path, whisper_model)
vcs = clarity_results["VCS"]
# Add to results
combined_results = {
"VCS": vcs,
}
return combined_results
# Ensure the functions are exposed when imported
__all__ = ['compute_voice_clarity_score', 'analyze_voice_quality'] |