File size: 1,566 Bytes
89c9a17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# Copyright (c) Meta Platforms, Inc. and affiliates
# All rights reserved.
#
# This source code is licensed under the license found in the
# MIT_LICENSE file in the root directory of this source tree.
import pandas as pd
import csv
import scipy
from typing import Dict
def get_pause(pause_data_tsv: str) -> Dict[str, float]:
utt_pause_align_data = pd.read_csv(
pause_data_tsv,
sep="\t",
quoting=csv.QUOTE_MINIMAL,
)
metrics = {}
pause_duration_weight = (
utt_pause_align_data.total_weight / utt_pause_align_data.total_weight.sum()
)
for score_name in [
"wmean_duration_score",
"wmean_alignment_score",
"wmean_joint_score",
]:
metrics[score_name] = (
utt_pause_align_data[f"{score_name}"] * pause_duration_weight
).sum()
return metrics
def get_rate(target_speech_tsv: str, source_speech_tsv: str) -> float:
speech_unit = "syllable"
target_speech_df = pd.read_csv(
target_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL
).set_index("id")
source_speech_df = pd.read_csv(
source_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL
).set_index("id")
# using "syllable" speech unit for rate computation
src_speech_rate = source_speech_df[f"speech_rate_{speech_unit}"].to_numpy()
tgt_speech_rate = target_speech_df[f"speech_rate_{speech_unit}"].to_numpy()
src_tgt_spearman = scipy.stats.spearmanr(src_speech_rate, tgt_speech_rate)
return src_tgt_spearman.correlation # type: ignore[no-any-return]
|