Upload seamless_communication/cli/expressivity/evaluate/post_process_pauserate.py with huggingface_hub
Browse files
seamless_communication/cli/expressivity/evaluate/post_process_pauserate.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the license found in the
|
5 |
+
# MIT_LICENSE file in the root directory of this source tree.
|
6 |
+
|
7 |
+
import pandas as pd
|
8 |
+
import csv
|
9 |
+
import scipy
|
10 |
+
from typing import Dict
|
11 |
+
|
12 |
+
|
13 |
+
def get_pause(pause_data_tsv: str) -> Dict[str, float]:
|
14 |
+
utt_pause_align_data = pd.read_csv(
|
15 |
+
pause_data_tsv,
|
16 |
+
sep="\t",
|
17 |
+
quoting=csv.QUOTE_MINIMAL,
|
18 |
+
)
|
19 |
+
metrics = {}
|
20 |
+
pause_duration_weight = (
|
21 |
+
utt_pause_align_data.total_weight / utt_pause_align_data.total_weight.sum()
|
22 |
+
)
|
23 |
+
for score_name in [
|
24 |
+
"wmean_duration_score",
|
25 |
+
"wmean_alignment_score",
|
26 |
+
"wmean_joint_score",
|
27 |
+
]:
|
28 |
+
metrics[score_name] = (
|
29 |
+
utt_pause_align_data[f"{score_name}"] * pause_duration_weight
|
30 |
+
).sum()
|
31 |
+
return metrics
|
32 |
+
|
33 |
+
|
34 |
+
def get_rate(target_speech_tsv: str, source_speech_tsv: str) -> float:
|
35 |
+
speech_unit = "syllable"
|
36 |
+
|
37 |
+
target_speech_df = pd.read_csv(
|
38 |
+
target_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL
|
39 |
+
).set_index("id")
|
40 |
+
source_speech_df = pd.read_csv(
|
41 |
+
source_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL
|
42 |
+
).set_index("id")
|
43 |
+
|
44 |
+
# using "syllable" speech unit for rate computation
|
45 |
+
src_speech_rate = source_speech_df[f"speech_rate_{speech_unit}"].to_numpy()
|
46 |
+
tgt_speech_rate = target_speech_df[f"speech_rate_{speech_unit}"].to_numpy()
|
47 |
+
src_tgt_spearman = scipy.stats.spearmanr(src_speech_rate, tgt_speech_rate)
|
48 |
+
return src_tgt_spearman.correlation # type: ignore[no-any-return]
|