# Copyright (c) Meta Platforms, Inc. and affiliates | |
# All rights reserved. | |
# | |
# This source code is licensed under the license found in the | |
# MIT_LICENSE file in the root directory of this source tree. | |
import pandas as pd | |
import csv | |
import scipy | |
from typing import Dict | |
def get_pause(pause_data_tsv: str) -> Dict[str, float]: | |
utt_pause_align_data = pd.read_csv( | |
pause_data_tsv, | |
sep="\t", | |
quoting=csv.QUOTE_MINIMAL, | |
) | |
metrics = {} | |
pause_duration_weight = ( | |
utt_pause_align_data.total_weight / utt_pause_align_data.total_weight.sum() | |
) | |
for score_name in [ | |
"wmean_duration_score", | |
"wmean_alignment_score", | |
"wmean_joint_score", | |
]: | |
metrics[score_name] = ( | |
utt_pause_align_data[f"{score_name}"] * pause_duration_weight | |
).sum() | |
return metrics | |
def get_rate(target_speech_tsv: str, source_speech_tsv: str) -> float: | |
speech_unit = "syllable" | |
target_speech_df = pd.read_csv( | |
target_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL | |
).set_index("id") | |
source_speech_df = pd.read_csv( | |
source_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL | |
).set_index("id") | |
# using "syllable" speech unit for rate computation | |
src_speech_rate = source_speech_df[f"speech_rate_{speech_unit}"].to_numpy() | |
tgt_speech_rate = target_speech_df[f"speech_rate_{speech_unit}"].to_numpy() | |
src_tgt_spearman = scipy.stats.spearmanr(src_speech_rate, tgt_speech_rate) | |
return src_tgt_spearman.correlation # type: ignore[no-any-return] | |