Spaces:

jaykishan-b
/

speech-analysis

Runtime error

App Files Files Community

speech-analysis / app /utils /intonation.py

jaykishan-b

init

79b7942 6 months ago

raw

history blame

2.18 kB

	import librosa
	import numpy as np
	from pydub import AudioSegment


	# Feature extraction for Intonation
	def evaluate_intonation(wav_file):
	# Load audio using librosa
	y, sr = librosa.load(wav_file)

	# 1. Sentence Stress (based on energy)
	# Calculate Root mean square energy which represents the perceived loudness or power of the audio, which can be linked to sentence stress
	rms_energy = librosa.feature.rms(y=y)[0]
	avg_energy = np.mean(rms_energy) * 10 # Scale up for score calculation
	avg_energy = float(avg_energy) # Ensure scalar

	# 2. Intonation Patterns (based on pitch variation)
	# Estimate the pitch (fundamental frequency) of the audio signal over time.
	pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
	pitch_values = pitches[magnitudes > np.median(magnitudes)]
	if len(pitch_values) > 0:
	pitch_variation = np.std(pitch_values) / np.mean(pitch_values) * 10
	else:
	pitch_variation = 0
	pitch_variation = float(pitch_variation) # Ensure scalar

	# 3. Rhythm (based on tempo)
	# Estimates the tempo of the audio in beats per minute (BPM) by tracking the rhythmic structure (detecting beats in the signal).
	# Average speaking tempo is ~120 BPM
	tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
	rhythm_score = (tempo / 120) * 10 # Adjust based on average speaking tempo (120 BPM)
	rhythm_score = float(rhythm_score) # Ensure scalar

	# Average score as per the formula: (Sentence Stress + Intonation Patterns + Rhythm) / 3
	# Normalize the scores before calculating the final score
	sentence_stress_score = min(max(avg_energy, 0), 10) # Ensure it's within [0, 10]
	intonation_patterns_score = min(max(pitch_variation, 0), 10) # Ensure it's within [0, 10]
	rhythm_score = min(max(rhythm_score, 0), 10) # Ensure it's within [0, 10]

	# Intonation score calculation
	intonation_score = (sentence_stress_score + intonation_patterns_score + rhythm_score) / 3

	return {
	"sentence_stress": sentence_stress_score,
	"intonation_patterns": intonation_patterns_score,
	"rhythm": rhythm_score,
	"intonation_score": intonation_score,
	}