Spaces:

YoMioAI
/

GPT-SoVITS-3s-cloning-free-TTS

Running

App Files Files Community

GPT-SoVITS-3s-cloning-free-TTS / utils.py

Xinonria

更新长度检测

b818867 2 months ago

raw

history blame

1.93 kB

	import io
	import os
	import pickle
	import re

	import soundfile as sf
	import numpy as np
	from pydub import AudioSegment
	from pyloudnorm import Meter

	os.chdir(os.path.dirname(os.path.abspath(__file__)))

	def normalize_audio_loudness(data: bytes, target_loudness: float = -23.0) -> bytes:
	audio = AudioSegment.from_file(io.BytesIO(data), format='mp3')
	meter = Meter(audio.frame_rate) # 创建一个响度计量器
	sr = audio.frame_rate
	samples = audio.get_array_of_samples()
	audio = np.array(samples, dtype=np.float64)

	# 测量积分响度
	loudness = meter.integrated_loudness(audio)

	# 计算增益
	gain_db = target_loudness - loudness
	gain_linear = 10 ** (gain_db / 20.0)

	# 应用增益
	balanced_audio = audio * gain_linear

	# 应用软限幅以防止削波
	balanced_audio = np.tanh(balanced_audio)

	# 将numpy数组转换回bytes
	balanced_audio = (balanced_audio * 32767).astype(np.int16)
	byte_io = io.BytesIO()
	sf.write(byte_io, balanced_audio, sr, format='mp3')
	normalized_audio_bytes = byte_io.getvalue()

	return normalized_audio_bytes

	def get_length(text: str) -> float:
	def calculate_string_length(text: str) -> float:
	def split_into_words(s: str) -> list[str]:
	return re.findall(r"\b\w+\b\|[^\w\s]\|\s+", s)

	def calculate_effective_length(words: list[str]) -> float:
	length = 0
	for word in words:
	if re.match(r"^[\u4e00-\u9fff\u3040-\u30ff\u3400-\u4dbf]+$", word):
	length += len(word)
	elif re.match(r"^\w+$", word):
	length += 1
	else:
	length += len(word) * 0.5
	return length

	words = split_into_words(text)
	return calculate_effective_length(words)

	return calculate_string_length(text)

	if __name__ == "__main__":
	normalize_audio_loudness()