Voice-Clone2

Running

App Files Files Community

Voice-Clone2 / TTS /tts /utils /languages.py

Shadhil

voice-clone with single audio sample input

9b2107c 12 months ago

raw

history blame

4.38 kB

	import os
	from typing import Any, Dict, List

	import fsspec
	import numpy as np
	import torch
	from coqpit import Coqpit

	from TTS.config import check_config_and_model_args
	from TTS.tts.utils.managers import BaseIDManager


	class LanguageManager(BaseIDManager):
	"""Manage the languages for multi-lingual 🐸TTS models. Load a datafile and parse the information
	in a way that can be queried by language.

	Args:
	language_ids_file_path (str, optional): Path to the metafile that maps language names to ids used by
	TTS models. Defaults to "".
	config (Coqpit, optional): Coqpit config that contains the language information in the datasets filed.
	Defaults to None.

	Examples:
	>>> manager = LanguageManager(language_ids_file_path=language_ids_file_path)
	>>> language_id_mapper = manager.language_ids
	"""

	def __init__(
	self,
	language_ids_file_path: str = "",
	config: Coqpit = None,
	):
	super().__init__(id_file_path=language_ids_file_path)

	if config:
	self.set_language_ids_from_config(config)

	@property
	def num_languages(self) -> int:
	return len(list(self.name_to_id.keys()))

	@property
	def language_names(self) -> List:
	return list(self.name_to_id.keys())

	@staticmethod
	def parse_language_ids_from_config(c: Coqpit) -> Dict:
	"""Set language id from config.

	Args:
	c (Coqpit): Config

	Returns:
	Tuple[Dict, int]: Language ID mapping and the number of languages.
	"""
	languages = set({})
	for dataset in c.datasets:
	if "language" in dataset:
	languages.add(dataset["language"])
	else:
	raise ValueError(f"Dataset {dataset['name']} has no language specified.")
	return {name: i for i, name in enumerate(sorted(list(languages)))}

	def set_language_ids_from_config(self, c: Coqpit) -> None:
	"""Set language IDs from config samples.

	Args:
	c (Coqpit): Config.
	"""
	self.name_to_id = self.parse_language_ids_from_config(c)

	@staticmethod
	def parse_ids_from_data(items: List, parse_key: str) -> Any:
	raise NotImplementedError

	def set_ids_from_data(self, items: List, parse_key: str) -> Any:
	raise NotImplementedError

	def save_ids_to_file(self, file_path: str) -> None:
	"""Save language IDs to a json file.

	Args:
	file_path (str): Path to the output file.
	"""
	self._save_json(file_path, self.name_to_id)

	@staticmethod
	def init_from_config(config: Coqpit) -> "LanguageManager":
	"""Initialize the language manager from a Coqpit config.

	Args:
	config (Coqpit): Coqpit config.
	"""
	language_manager = None
	if check_config_and_model_args(config, "use_language_embedding", True):
	if config.get("language_ids_file", None):
	language_manager = LanguageManager(language_ids_file_path=config.language_ids_file)
	language_manager = LanguageManager(config=config)
	return language_manager


	def _set_file_path(path):
	"""Find the language_ids.json under the given path or the above it.
	Intended to band aid the different paths returned in restored and continued training."""
	path_restore = os.path.join(os.path.dirname(path), "language_ids.json")
	path_continue = os.path.join(path, "language_ids.json")
	fs = fsspec.get_mapper(path).fs
	if fs.exists(path_restore):
	return path_restore
	if fs.exists(path_continue):
	return path_continue
	return None


	def get_language_balancer_weights(items: list):
	language_names = np.array([item["language"] for item in items])
	unique_language_names = np.unique(language_names).tolist()
	language_ids = [unique_language_names.index(l) for l in language_names]
	language_count = np.array([len(np.where(language_names == l)[0]) for l in unique_language_names])
	weight_language = 1.0 / language_count
	# get weight for each sample
	dataset_samples_weight = np.array([weight_language[l] for l in language_ids])
	# normalize
	dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight)
	return torch.from_numpy(dataset_samples_weight).float()