victan commited on
Commit
89c9a17
1 Parent(s): f45d0d6

Upload seamless_communication/cli/expressivity/evaluate/post_process_pauserate.py with huggingface_hub

Browse files
seamless_communication/cli/expressivity/evaluate/post_process_pauserate.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the license found in the
5
+ # MIT_LICENSE file in the root directory of this source tree.
6
+
7
+ import pandas as pd
8
+ import csv
9
+ import scipy
10
+ from typing import Dict
11
+
12
+
13
+ def get_pause(pause_data_tsv: str) -> Dict[str, float]:
14
+ utt_pause_align_data = pd.read_csv(
15
+ pause_data_tsv,
16
+ sep="\t",
17
+ quoting=csv.QUOTE_MINIMAL,
18
+ )
19
+ metrics = {}
20
+ pause_duration_weight = (
21
+ utt_pause_align_data.total_weight / utt_pause_align_data.total_weight.sum()
22
+ )
23
+ for score_name in [
24
+ "wmean_duration_score",
25
+ "wmean_alignment_score",
26
+ "wmean_joint_score",
27
+ ]:
28
+ metrics[score_name] = (
29
+ utt_pause_align_data[f"{score_name}"] * pause_duration_weight
30
+ ).sum()
31
+ return metrics
32
+
33
+
34
+ def get_rate(target_speech_tsv: str, source_speech_tsv: str) -> float:
35
+ speech_unit = "syllable"
36
+
37
+ target_speech_df = pd.read_csv(
38
+ target_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL
39
+ ).set_index("id")
40
+ source_speech_df = pd.read_csv(
41
+ source_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL
42
+ ).set_index("id")
43
+
44
+ # using "syllable" speech unit for rate computation
45
+ src_speech_rate = source_speech_df[f"speech_rate_{speech_unit}"].to_numpy()
46
+ tgt_speech_rate = target_speech_df[f"speech_rate_{speech_unit}"].to_numpy()
47
+ src_tgt_spearman = scipy.stats.spearmanr(src_speech_rate, tgt_speech_rate)
48
+ return src_tgt_spearman.correlation # type: ignore[no-any-return]