Fast_api / vcs /compute_vcs.py
mulasagg's picture
API optimizations
aef3b1e
"""
Compute Voice Clarity Score from audio file
"""
import librosa
import numpy as np
from typing import Dict, Any
from .vcs import calculate_voice_clarity_score, get_clarity_insight
def compute_voice_clarity_score(file_path: str, whisper_model) -> Dict[str, Any]:
"""
Compute Voice Clarity Score and its components from a speech sample.
Args:
file_path (str): Path to the audio file.
whisper_model: Transcription model (e.g., OpenAI Whisper or faster-whisper)
Returns:
dict: A dictionary containing Voice Clarity Score and component scores.
"""
# Transcribe audio
result = whisper_model.transcribe(file_path, word_timestamps=False, fp16=False)
transcript = result.get("text", "").strip()
segments = result.get("segments", [])
# Validate early
if not transcript or not segments:
raise ValueError("Empty transcript or segments from Whisper.")
# Load audio
y, sr = librosa.load(file_path, sr=None)
duration = len(y) / sr if sr else 0.0
if duration <= 0:
raise ValueError("Audio duration invalid or zero.")
# Calculate Voice Clarity Score
clarity_result = calculate_voice_clarity_score(y, sr, segments)
# Add word count and duration info for reference
word_count = len(transcript.split())
clarity_result["components"]["word_count"] = word_count
clarity_result["components"]["duration"] = duration
return clarity_result
def analyze_voice_quality(file_path: str, whisper_model) -> Dict[str, Any]:
"""
Comprehensive voice quality analysis including clarity.
Args:
file_path (str): Path to the audio file
whisper_model: Transcription model
Returns:
Dict[str, Any]: Complete voice quality analysis
"""
# Get Voice Clarity Score
clarity_results = compute_voice_clarity_score(file_path, whisper_model)
vcs = clarity_results["VCS"]
# Add to results
combined_results = {
"VCS": vcs,
}
return combined_results
# Ensure the functions are exposed when imported
__all__ = ['compute_voice_clarity_score', 'analyze_voice_quality']