victan
/

audio_seam

Upload seamless_communication/cli/expressivity/evaluate/post_process_pauserate.py with huggingface_hub

89c9a17 10 months ago

1.57 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# MIT_LICENSE file in the root directory of this source tree.

	import pandas as pd
	import csv
	import scipy
	from typing import Dict


	def get_pause(pause_data_tsv: str) -> Dict[str, float]:
	utt_pause_align_data = pd.read_csv(
	pause_data_tsv,
	sep="\t",
	quoting=csv.QUOTE_MINIMAL,
	)
	metrics = {}
	pause_duration_weight = (
	utt_pause_align_data.total_weight / utt_pause_align_data.total_weight.sum()
	)
	for score_name in [
	"wmean_duration_score",
	"wmean_alignment_score",
	"wmean_joint_score",
	]:
	metrics[score_name] = (
	utt_pause_align_data[f"{score_name}"] * pause_duration_weight
	).sum()
	return metrics


	def get_rate(target_speech_tsv: str, source_speech_tsv: str) -> float:
	speech_unit = "syllable"

	target_speech_df = pd.read_csv(
	target_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL
	).set_index("id")
	source_speech_df = pd.read_csv(
	source_speech_tsv, sep="\t", quoting=csv.QUOTE_MINIMAL
	).set_index("id")

	# using "syllable" speech unit for rate computation
	src_speech_rate = source_speech_df[f"speech_rate_{speech_unit}"].to_numpy()
	tgt_speech_rate = target_speech_df[f"speech_rate_{speech_unit}"].to_numpy()
	src_tgt_spearman = scipy.stats.spearmanr(src_speech_rate, tgt_speech_rate)
	return src_tgt_spearman.correlation # type: ignore[no-any-return]