|
""" |
|
Compute Voice Clarity Score from audio file |
|
""" |
|
|
|
import librosa |
|
import numpy as np |
|
from typing import Dict, Any |
|
from .vcs import calculate_voice_clarity_score, get_clarity_insight |
|
|
|
def compute_voice_clarity_score(file_path: str, whisper_model) -> Dict[str, Any]: |
|
""" |
|
Compute Voice Clarity Score and its components from a speech sample. |
|
|
|
Args: |
|
file_path (str): Path to the audio file. |
|
whisper_model: Transcription model (e.g., OpenAI Whisper or faster-whisper) |
|
|
|
Returns: |
|
dict: A dictionary containing Voice Clarity Score and component scores. |
|
""" |
|
|
|
result = whisper_model.transcribe(file_path, word_timestamps=False, fp16=False) |
|
transcript = result.get("text", "").strip() |
|
segments = result.get("segments", []) |
|
|
|
|
|
if not transcript or not segments: |
|
raise ValueError("Empty transcript or segments from Whisper.") |
|
|
|
|
|
y, sr = librosa.load(file_path, sr=None) |
|
duration = len(y) / sr if sr else 0.0 |
|
if duration <= 0: |
|
raise ValueError("Audio duration invalid or zero.") |
|
|
|
|
|
clarity_result = calculate_voice_clarity_score(y, sr, segments) |
|
|
|
|
|
|
|
word_count = len(transcript.split()) |
|
clarity_result["components"]["word_count"] = word_count |
|
clarity_result["components"]["duration"] = duration |
|
|
|
return clarity_result |
|
|
|
def analyze_voice_quality(file_path: str, whisper_model) -> Dict[str, Any]: |
|
""" |
|
Comprehensive voice quality analysis including clarity. |
|
|
|
Args: |
|
file_path (str): Path to the audio file |
|
whisper_model: Transcription model |
|
|
|
Returns: |
|
Dict[str, Any]: Complete voice quality analysis |
|
""" |
|
|
|
clarity_results = compute_voice_clarity_score(file_path, whisper_model) |
|
vcs = clarity_results["VCS"] |
|
|
|
|
|
|
|
combined_results = { |
|
"VCS": vcs, |
|
} |
|
|
|
return combined_results |
|
|
|
|
|
__all__ = ['compute_voice_clarity_score', 'analyze_voice_quality'] |