Spaces:
Running
Running
Delete scoring.py
Browse files- scoring.py +0 -95
scoring.py
DELETED
@@ -1,95 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import librosa
|
3 |
-
|
4 |
-
def calculate_expected_value(scores):
|
5 |
-
# First calculate the probability of each unique score
|
6 |
-
unique_scores, counts = np.unique(scores, return_counts=True)
|
7 |
-
probabilities = counts / len(scores)
|
8 |
-
|
9 |
-
# Then calculate the expected value as the sum of scores times their probabilities
|
10 |
-
expected_value = np.dot(unique_scores, probabilities)
|
11 |
-
return expected_value
|
12 |
-
|
13 |
-
|
14 |
-
def calculate_fluency_score(audio_path, total_words, word_pronunciation_scores, base_script_len):
|
15 |
-
|
16 |
-
avg_pronunciation_score = calculate_expected_value(word_pronunciation_scores)
|
17 |
-
if (total_words / base_script_len) < 0.15 or avg_pronunciation_score < 1.5:
|
18 |
-
return 10
|
19 |
-
audio, sr = librosa.load(audio_path)
|
20 |
-
non_silent_intervals = librosa.effects.split(audio, top_db=22)
|
21 |
-
non_silent_duration = sum([intv[1] - intv[0] for intv in non_silent_intervals]) / sr
|
22 |
-
|
23 |
-
total_duration = len(audio) / sr
|
24 |
-
|
25 |
-
non_silent_duration = non_silent_duration
|
26 |
-
ideal_min_rate, ideal_max_rate = 120 / 60, 140 / 60
|
27 |
-
actual_speech_rate = (total_words / (non_silent_duration + 1e-7)) * (total_words / base_script_len)
|
28 |
-
speaking_ratio = non_silent_duration / total_duration
|
29 |
-
# Existing speech rate score calculation
|
30 |
-
|
31 |
-
# Determine if speech rate is within the ideal range
|
32 |
-
if actual_speech_rate <= ideal_max_rate:
|
33 |
-
# Within the ideal range or speaking slow
|
34 |
-
max_ratio = actual_speech_rate / ideal_max_rate
|
35 |
-
min_ratio = (actual_speech_rate / ideal_min_rate)
|
36 |
-
speech_rate_score = np.mean([max_ratio, min_ratio]) - 0.167
|
37 |
-
# for normal speaking speech_rate_score between (0.708, 1) and for slow speaking speech_rate_score (0.707, 0)
|
38 |
-
else:
|
39 |
-
# Too fast
|
40 |
-
# for fast speaking speech_rate_score (0.707, 0)
|
41 |
-
max_ratio = actual_speech_rate / ideal_max_rate
|
42 |
-
speech_rate_score = 0.7 / max_ratio
|
43 |
-
|
44 |
-
# If speaking ratio is significantly less than the gold standard, reduce the fluency score
|
45 |
-
gold_standard_ratio = 0.9 # Assuming 90% speaking time is gold standard for natural speech
|
46 |
-
speaking_ratio_score = min(speaking_ratio / gold_standard_ratio, 1)
|
47 |
-
|
48 |
-
|
49 |
-
# Pronunciation score calculation
|
50 |
-
avg_pronunciation_score = (avg_pronunciation_score - 1) / 2
|
51 |
-
|
52 |
-
# pronunciation_variance = np.var(word_pronunciation_scores, ddof=1,)
|
53 |
-
|
54 |
-
# Weighted combination of scores
|
55 |
-
# Adjust weights as needed
|
56 |
-
weight_speech_rate = 0.30
|
57 |
-
weight_speaking_ratio = 0.20
|
58 |
-
weight_pronunciation = 0.50
|
59 |
-
# weight_pronunciation_variance = 0.10
|
60 |
-
|
61 |
-
combined_score = speech_rate_score * weight_speech_rate + speaking_ratio_score * weight_speaking_ratio + avg_pronunciation_score * weight_pronunciation
|
62 |
-
|
63 |
-
# Scale the combined score to be between 10% and 100%
|
64 |
-
scaled_fluency_score = 10 + combined_score * 80
|
65 |
-
|
66 |
-
return scaled_fluency_score
|
67 |
-
|
68 |
-
def calculate_pronunciation_accuracy(word_pronunciation_scores, fluency_score, base_script_len, total_words):
|
69 |
-
# if total_words / base_script_len < 0.25:
|
70 |
-
# return 10
|
71 |
-
# Calculate average word pronunciation score
|
72 |
-
avg_pronunciation_score = calculate_expected_value(word_pronunciation_scores)
|
73 |
-
|
74 |
-
fluency_score = fluency_score / 100
|
75 |
-
|
76 |
-
avg_pronunciation_score = (avg_pronunciation_score - 1) / 2
|
77 |
-
avg_weight = 0.75
|
78 |
-
flu_weight = 0.25
|
79 |
-
combined_score = avg_weight * avg_pronunciation_score + flu_weight * fluency_score
|
80 |
-
# Scale to 10% - 90%
|
81 |
-
final_score = 10 + combined_score * 80
|
82 |
-
|
83 |
-
return final_score
|
84 |
-
|
85 |
-
def calculate_fluency_and_pronunciation(audio_path, total_words, word_pronunciation_scores, base_script_len):
|
86 |
-
|
87 |
-
fluency_score = calculate_fluency_score(audio_path, total_words, word_pronunciation_scores, base_script_len)
|
88 |
-
|
89 |
-
pronunciation_accuracy = calculate_pronunciation_accuracy(word_pronunciation_scores, fluency_score, base_script_len, total_words)
|
90 |
-
|
91 |
-
return {'fluency_score': fluency_score, 'pronunciation_accuracy': pronunciation_accuracy}
|
92 |
-
|
93 |
-
|
94 |
-
if __name__ == '__main__':
|
95 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|