Text-to-Speech

Sleeping

Text-to-Speech / text /symbols.py

Upload 685 files

0d80816 11 months ago

1.06 kB

	# Copyright (c) 2023 Amphion.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	""" This code is modified from https://github.com/keithito/tacotron """

	"""
	Defines the set of symbols used in text input to the model.

	The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. """

	from text import cmudict, pinyin

	_pad = "_"
	_punctuation = "!'(),.:;? "
	_special = "-"
	_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
	_silences = ["@sp", "@spn", "@sil"]

	# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
	_arpabet = ["@" + s for s in cmudict.valid_symbols]
	_pinyin = ["@" + s for s in pinyin.valid_symbols]

	# Export all symbols:
	symbols = (
	[_pad]
	+ list(_special)
	+ list(_punctuation)
	+ list(_letters)
	+ _arpabet
	+ _silences
	# + _pinyin # for chinese
	)