Spaces:

NATSpeech
/

DiffSpeech

Runtime error

App Files Files Community

DiffSpeech / mfa_usr /mfa.py

RayeRen

init

d1b91e7 over 2 years ago

raw history blame

No virus

30 kB

	import atexit
	import sys
	import os
	import time
	import argparse
	from datetime import datetime
	import multiprocessing as mp

	from montreal_forced_aligner import __version__

	from montreal_forced_aligner.utils import get_available_acoustic_languages, get_available_g2p_languages, \
	get_available_dict_languages, get_available_lm_languages, get_available_ivector_languages
	from montreal_forced_aligner.command_line.align import run_align_corpus

	from mfa_usr.adapt import run_adapt_model
	from montreal_forced_aligner.command_line.train_and_align import run_train_corpus
	from montreal_forced_aligner.command_line.g2p import run_g2p
	from montreal_forced_aligner.command_line.train_g2p import run_train_g2p
	from montreal_forced_aligner.command_line.validate import run_validate_corpus
	from montreal_forced_aligner.command_line.download import run_download
	from montreal_forced_aligner.command_line.train_lm import run_train_lm
	from montreal_forced_aligner.command_line.thirdparty import run_thirdparty
	from montreal_forced_aligner.command_line.train_ivector_extractor import run_train_ivector_extractor
	from montreal_forced_aligner.command_line.classify_speakers import run_classify_speakers
	from montreal_forced_aligner.command_line.transcribe import run_transcribe_corpus
	from montreal_forced_aligner.command_line.train_dictionary import run_train_dictionary
	from montreal_forced_aligner.command_line.create_segments import run_create_segments
	from montreal_forced_aligner.exceptions import MFAError
	from montreal_forced_aligner.config import update_global_config, load_global_config, update_command_history, \
	load_command_history


	class ExitHooks(object):
	def __init__(self):
	self.exit_code = None
	self.exception = None

	def hook(self):
	self._orig_exit = sys.exit
	sys.exit = self.exit
	sys.excepthook = self.exc_handler

	def exit(self, code=0):
	self.exit_code = code
	self._orig_exit(code)

	def exc_handler(self, exc_type, exc, *args):
	self.exception = exc


	hooks = ExitHooks()
	hooks.hook()

	BEGIN = time.time()
	BEGIN_DATE = datetime.now()


	def history_save_handler():
	history_data = {
	'command': ' '.join(sys.argv),
	'execution_time': time.time() - BEGIN,
	'date': BEGIN_DATE,
	'version': __version__
	}

	if hooks.exit_code is not None:
	history_data['exit_code'] = hooks.exit_code
	history_data['exception'] = ''
	elif hooks.exception is not None:
	history_data['exit_code'] = 1
	history_data['exception'] = hooks.exception
	else:
	history_data['exception'] = ''
	history_data['exit_code'] = 0
	update_command_history(history_data)


	atexit.register(history_save_handler)


	def fix_path():
	from montreal_forced_aligner.config import TEMP_DIR
	thirdparty_dir = os.path.join(TEMP_DIR, 'thirdparty', 'bin')
	old_path = os.environ.get('PATH', '')
	if sys.platform == 'win32':
	os.environ['PATH'] = thirdparty_dir + ';' + old_path
	else:
	os.environ['PATH'] = thirdparty_dir + ':' + old_path
	os.environ['LD_LIBRARY_PATH'] = thirdparty_dir + ':' + os.environ.get('LD_LIBRARY_PATH', '')


	def unfix_path():
	if sys.platform == 'win32':
	sep = ';'
	os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
	else:
	sep = ':'
	os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
	os.environ['LD_LIBRARY_PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])


	acoustic_languages = get_available_acoustic_languages()
	ivector_languages = get_available_ivector_languages()
	lm_languages = get_available_lm_languages()
	g2p_languages = get_available_g2p_languages()
	dict_languages = get_available_dict_languages()


	def create_parser():
	GLOBAL_CONFIG = load_global_config()

	def add_global_options(subparser, textgrid_output=False):
	subparser.add_argument('-t', '--temp_directory', type=str, default=GLOBAL_CONFIG['temp_directory'],
	help=f"Temporary directory root to store MFA created files, default is {GLOBAL_CONFIG['temp_directory']}")
	subparser.add_argument('--disable_mp',
	help=f"Disable any multiprocessing during alignment (not recommended), default is {not GLOBAL_CONFIG['use_mp']}",
	action='store_true',
	default=not GLOBAL_CONFIG['use_mp'])
	subparser.add_argument('-j', '--num_jobs', type=int, default=GLOBAL_CONFIG['num_jobs'],
	help=f"Number of data splits (and cores to use if multiprocessing is enabled), defaults "
	f"is {GLOBAL_CONFIG['num_jobs']}")
	subparser.add_argument('-v', '--verbose', help=f"Output debug messages, default is {GLOBAL_CONFIG['verbose']}",
	action='store_true',
	default=GLOBAL_CONFIG['verbose'])
	subparser.add_argument('--clean', help=f"Remove files from previous runs, default is {GLOBAL_CONFIG['clean']}",
	action='store_true',
	default=GLOBAL_CONFIG['clean'])
	subparser.add_argument('--overwrite',
	help=f"Overwrite output files when they exist, default is {GLOBAL_CONFIG['overwrite']}",
	action='store_true',
	default=GLOBAL_CONFIG['overwrite'])
	subparser.add_argument('--debug',
	help=f"Run extra steps for debugging issues, default is {GLOBAL_CONFIG['debug']}",
	action='store_true',
	default=GLOBAL_CONFIG['debug'])
	if textgrid_output:
	subparser.add_argument('--disable_textgrid_cleanup',
	help=f"Disable extra clean up steps on TextGrid output, default is {not GLOBAL_CONFIG['cleanup_textgrids']}",
	action='store_true',
	default=not GLOBAL_CONFIG['cleanup_textgrids'])

	parser = argparse.ArgumentParser()

	subparsers = parser.add_subparsers(dest="subcommand")
	subparsers.required = True

	version_parser = subparsers.add_parser('version')

	align_parser = subparsers.add_parser('align')
	align_parser.add_argument('corpus_directory', help="Full path to the directory to align")
	align_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	align_parser.add_argument('acoustic_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	align_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	align_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for alignment")
	align_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	align_parser.add_argument('-a', '--audio_directory', type=str, default='',
	help="Audio directory root to use for finding audio files")
	add_global_options(align_parser, textgrid_output=True)

	adapt_parser = subparsers.add_parser('adapt')
	adapt_parser.add_argument('corpus_directory', help="Full path to the directory to align")
	adapt_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	adapt_parser.add_argument('acoustic_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	adapt_parser.add_argument('output_model_path',
	help="Full path to save adapted_model")
	adapt_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	adapt_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for alignment")
	adapt_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	adapt_parser.add_argument('-a', '--audio_directory', type=str, default='',
	help="Audio directory root to use for finding audio files")
	add_global_options(adapt_parser, textgrid_output=True)

	train_parser = subparsers.add_parser('train')
	train_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
	train_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
	default='')
	train_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	train_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for training and alignment")
	train_parser.add_argument('-o', '--output_model_path', type=str, default='',
	help="Full path to save resulting acoustic and dictionary model")
	train_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of filenames to use for determining speaker, "
	'default is to use directory names')
	train_parser.add_argument('-a', '--audio_directory', type=str, default='',
	help="Audio directory root to use for finding audio files")
	train_parser.add_argument('-m', '--acoustic_model_path', type=str, default='',
	help="Full path to save adapted_model")

	add_global_options(train_parser, textgrid_output=True)

	validate_parser = subparsers.add_parser('validate')
	validate_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
	validate_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
	default='')
	validate_parser.add_argument('acoustic_model_path', nargs='?', default='',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	validate_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	validate_parser.add_argument('--test_transcriptions', help="Test accuracy of transcriptions", action='store_true')
	validate_parser.add_argument('--ignore_acoustics',
	help="Skip acoustic feature generation and associated validation",
	action='store_true')
	add_global_options(validate_parser)

	g2p_model_help_message = f'''Full path to the archive containing pre-trained model or language ({', '.join(g2p_languages)})
	If not specified, then orthographic transcription is split into pronunciations.'''
	g2p_parser = subparsers.add_parser('g2p')
	g2p_parser.add_argument("g2p_model_path", help=g2p_model_help_message, nargs='?')

	g2p_parser.add_argument("input_path",
	help="Corpus to base word list on or a text file of words to generate pronunciations")
	g2p_parser.add_argument("output_path", help="Path to save output dictionary")
	g2p_parser.add_argument('--include_bracketed', help="Included words enclosed by brackets, i.e. [...], (...), <...>",
	action='store_true')
	g2p_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for G2P")
	add_global_options(g2p_parser)

	train_g2p_parser = subparsers.add_parser('train_g2p')
	train_g2p_parser.add_argument("dictionary_path", help="Location of existing dictionary")

	train_g2p_parser.add_argument("output_model_path", help="Desired location of generated model")
	train_g2p_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for G2P")
	train_g2p_parser.add_argument("--validate", action='store_true',
	help="Perform an analysis of accuracy training on "
	"most of the data and validating on an unseen subset")
	add_global_options(train_g2p_parser)

	download_parser = subparsers.add_parser('download')
	download_parser.add_argument("model_type",
	help="Type of model to download, one of 'acoustic', 'g2p', or 'dictionary'")
	download_parser.add_argument("language", help="Name of language code to download, if not specified, "
	"will list all available languages", nargs='?')

	train_lm_parser = subparsers.add_parser('train_lm')
	train_lm_parser.add_argument('source_path', help="Full path to the source directory to train from, alternatively "
	'an ARPA format language model to convert for MFA use')
	train_lm_parser.add_argument('output_model_path', type=str,
	help="Full path to save resulting language model")
	train_lm_parser.add_argument('-m', '--model_path', type=str,
	help="Full path to existing language model to merge probabilities")
	train_lm_parser.add_argument('-w', '--model_weight', type=float, default=1.0,
	help="Weight factor for supplemental language model, defaults to 1.0")
	train_lm_parser.add_argument('--dictionary_path', help="Full path to the pronunciation dictionary to use",
	default='')
	train_lm_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for training and alignment")
	add_global_options(train_lm_parser)

	train_dictionary_parser = subparsers.add_parser('train_dictionary')
	train_dictionary_parser.add_argument('corpus_directory', help="Full path to the directory to align")
	train_dictionary_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	train_dictionary_parser.add_argument('acoustic_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	train_dictionary_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	train_dictionary_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for alignment")
	train_dictionary_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	add_global_options(train_dictionary_parser)

	train_ivector_parser = subparsers.add_parser('train_ivector')
	train_ivector_parser.add_argument('corpus_directory', help="Full path to the source directory to "
	'train the ivector extractor')
	train_ivector_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	train_ivector_parser.add_argument('acoustic_model_path', type=str, default='',
	help="Full path to acoustic model for alignment")
	train_ivector_parser.add_argument('output_model_path', type=str, default='',
	help="Full path to save resulting ivector extractor")
	train_ivector_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of filenames to use for determining speaker, "
	'default is to use directory names')
	train_ivector_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for training")
	add_global_options(train_ivector_parser)

	classify_speakers_parser = subparsers.add_parser('classify_speakers')
	classify_speakers_parser.add_argument('corpus_directory', help="Full path to the source directory to "
	'run speaker classification')
	classify_speakers_parser.add_argument('ivector_extractor_path', type=str, default='',
	help="Full path to ivector extractor model")
	classify_speakers_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")

	classify_speakers_parser.add_argument('-s', '--num_speakers', type=int, default=0,
	help="Number of speakers if known")
	classify_speakers_parser.add_argument('--cluster', help="Using clustering instead of classification",
	action='store_true')
	classify_speakers_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for ivector extraction")
	add_global_options(classify_speakers_parser)

	create_segments_parser = subparsers.add_parser('create_segments')
	create_segments_parser.add_argument('corpus_directory', help="Full path to the source directory to "
	'run VAD segmentation')
	create_segments_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	create_segments_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for segmentation")
	add_global_options(create_segments_parser)

	transcribe_parser = subparsers.add_parser('transcribe')
	transcribe_parser.add_argument('corpus_directory', help="Full path to the directory to transcribe")
	transcribe_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	transcribe_parser.add_argument('acoustic_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	transcribe_parser.add_argument('language_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(lm_languages)})")
	transcribe_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	transcribe_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for transcription")
	transcribe_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	transcribe_parser.add_argument('-a', '--audio_directory', type=str, default='',
	help="Audio directory root to use for finding audio files")
	transcribe_parser.add_argument('-e', '--evaluate', help="Evaluate the transcription "
	"against golden texts", action='store_true')
	add_global_options(transcribe_parser)

	config_parser = subparsers.add_parser('configure',
	help="The configure command is used to set global defaults for MFA so "
	"you don't have to set them every time you call an MFA command.")
	config_parser.add_argument('-t', '--temp_directory', type=str, default='',
	help=f"Set the default temporary directory, default is {GLOBAL_CONFIG['temp_directory']}")
	config_parser.add_argument('-j', '--num_jobs', type=int,
	help=f"Set the number of processes to use by default, defaults to {GLOBAL_CONFIG['num_jobs']}")
	config_parser.add_argument('--always_clean', help="Always remove files from previous runs by default",
	action='store_true')
	config_parser.add_argument('--never_clean', help="Don't remove files from previous runs by default",
	action='store_true')
	config_parser.add_argument('--always_verbose', help="Default to verbose output", action='store_true')
	config_parser.add_argument('--never_verbose', help="Default to non-verbose output", action='store_true')
	config_parser.add_argument('--always_debug', help="Default to running debugging steps", action='store_true')
	config_parser.add_argument('--never_debug', help="Default to not running debugging steps", action='store_true')
	config_parser.add_argument('--always_overwrite', help="Always overwrite output files", action='store_true')
	config_parser.add_argument('--never_overwrite', help="Never overwrite output files (if file already exists, "
	"the output will be saved in the temp directory)",
	action='store_true')
	config_parser.add_argument('--disable_mp', help="Disable all multiprocessing (not recommended as it will usually "
	"increase processing times)", action='store_true')
	config_parser.add_argument('--enable_mp', help="Enable multiprocessing (recommended and enabled by default)",
	action='store_true')
	config_parser.add_argument('--disable_textgrid_cleanup', help="Disable postprocessing of TextGrids that cleans up "
	"silences and recombines compound words and clitics",
	action='store_true')
	config_parser.add_argument('--enable_textgrid_cleanup', help="Enable postprocessing of TextGrids that cleans up "
	"silences and recombines compound words and clitics",
	action='store_true')

	history_parser = subparsers.add_parser('history')

	history_parser.add_argument('depth', help='Number of commands to list', nargs='?', default=10)
	history_parser.add_argument('--verbose', help="Flag for whether to output additional information",
	action='store_true')

	annotator_parser = subparsers.add_parser('annotator')
	anchor_parser = subparsers.add_parser('anchor')

	thirdparty_parser = subparsers.add_parser('thirdparty')

	thirdparty_parser.add_argument("command",
	help="One of 'download', 'validate', or 'kaldi'")
	thirdparty_parser.add_argument('local_directory',
	help="Full path to the built executables to collect", nargs="?",
	default='')
	return parser


	parser = create_parser()


	def main():
	parser = create_parser()
	mp.freeze_support()
	args, unknown = parser.parse_known_args()
	for short in ['-c', '-d']:
	if short in unknown:
	print(f'Due to the number of options that `{short}` could refer to, it is not accepted. '
	'Please specify the full argument')
	sys.exit(1)
	try:
	fix_path()
	if args.subcommand in ['align', 'train', 'train_ivector']:
	from montreal_forced_aligner.thirdparty.kaldi import validate_alignment_binaries
	if not validate_alignment_binaries():
	print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
	"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
	"on why this check failed.")
	sys.exit(1)
	elif args.subcommand in ['transcribe']:
	from montreal_forced_aligner.thirdparty.kaldi import validate_transcribe_binaries
	if not validate_transcribe_binaries():
	print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
	"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
	"on why this check failed. If you are on MacOS, please note that the thirdparty binaries available "
	"via the download command do not contain the transcription ones. To get this functionality working "
	"for the time being, please build kaldi locally and follow the instructions for running the "
	"'mfa thirdparty kaldi' command.")
	sys.exit(1)
	elif args.subcommand in ['train_dictionary']:
	from montreal_forced_aligner.thirdparty.kaldi import validate_train_dictionary_binaries
	if not validate_train_dictionary_binaries():
	print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
	"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
	"on why this check failed. If you are on MacOS, please note that the thirdparty binaries available "
	"via the download command do not contain the train_dictionary ones. To get this functionality working "
	"for the time being, please build kaldi locally and follow the instructions for running the "
	"'mfa thirdparty kaldi' command.")
	sys.exit(1)
	elif args.subcommand in ['g2p', 'train_g2p']:
	try:
	import pynini
	except ImportError:
	print("There was an issue importing Pynini, please ensure that it is installed. If you are on Windows, "
	"please use the Windows Subsystem for Linux to use g2p functionality.")
	sys.exit(1)
	if args.subcommand == 'align':
	run_align_corpus(args, unknown, acoustic_languages)
	elif args.subcommand == 'adapt':
	run_adapt_model(args, unknown, acoustic_languages)
	elif args.subcommand == 'train':
	run_train_corpus(args, unknown)
	elif args.subcommand == 'g2p':
	run_g2p(args, unknown, g2p_languages)
	elif args.subcommand == 'train_g2p':
	run_train_g2p(args, unknown)
	elif args.subcommand == 'validate':
	run_validate_corpus(args, unknown)
	elif args.subcommand == 'download':
	run_download(args)
	elif args.subcommand == 'train_lm':
	run_train_lm(args, unknown)
	elif args.subcommand == 'train_dictionary':
	run_train_dictionary(args, unknown)
	elif args.subcommand == 'train_ivector':
	run_train_ivector_extractor(args, unknown)
	elif args.subcommand == 'classify_speakers':
	run_classify_speakers(args, unknown)
	elif args.subcommand in ['annotator', 'anchor']:
	from montreal_forced_aligner.command_line.anchor import run_anchor
	run_anchor(args)
	elif args.subcommand == 'thirdparty':
	run_thirdparty(args)
	elif args.subcommand == 'transcribe':
	run_transcribe_corpus(args, unknown)
	elif args.subcommand == 'create_segments':
	run_create_segments(args, unknown)
	elif args.subcommand == 'configure':
	update_global_config(args)
	global GLOBAL_CONFIG
	GLOBAL_CONFIG = load_global_config()
	elif args.subcommand == 'history':
	depth = args.depth
	history = load_command_history()[-depth:]
	for h in history:
	if args.verbose:
	print('command\tDate\tExecution time\tVersion\tExit code\tException')
	for h in history:
	execution_time = time.strftime('%H:%M:%S', time.gmtime(h['execution_time']))
	d = h['date'].isoformat()
	print(
	f"{h['command']}\t{d}\t{execution_time}\t{h['version']}\t{h['exit_code']}\t{h['exception']}")
	pass
	else:
	for h in history:
	print(h['command'])

	elif args.subcommand == 'version':
	print(__version__)
	except MFAError as e:
	if getattr(args, 'debug', False):
	raise
	print(e)
	sys.exit(1)
	finally:
	unfix_path()


	if __name__ == '__main__':
	main()