Spaces:

NATSpeech
/

DiffSpeech

Runtime error

App Files Files Community

DiffSpeech / mfa_usr /mfa.py

RayeRen

init

d1b91e7 over 2 years ago

raw

history blame contribute delete

No virus

30 kB

	import atexit
	import sys
	import os
	import time
	import argparse
	from datetime import datetime
	import multiprocessing as mp

	from montreal_forced_aligner import __version__

	from montreal_forced_aligner.utils import get_available_acoustic_languages, get_available_g2p_languages, \
	get_available_dict_languages, get_available_lm_languages, get_available_ivector_languages
	from montreal_forced_aligner.command_line.align import run_align_corpus

	from mfa_usr.adapt import run_adapt_model
	from montreal_forced_aligner.command_line.train_and_align import run_train_corpus
	from montreal_forced_aligner.command_line.g2p import run_g2p
	from montreal_forced_aligner.command_line.train_g2p import run_train_g2p
	from montreal_forced_aligner.command_line.validate import run_validate_corpus
	from montreal_forced_aligner.command_line.download import run_download
	from montreal_forced_aligner.command_line.train_lm import run_train_lm
	from montreal_forced_aligner.command_line.thirdparty import run_thirdparty
	from montreal_forced_aligner.command_line.train_ivector_extractor import run_train_ivector_extractor
	from montreal_forced_aligner.command_line.classify_speakers import run_classify_speakers
	from montreal_forced_aligner.command_line.transcribe import run_transcribe_corpus
	from montreal_forced_aligner.command_line.train_dictionary import run_train_dictionary
	from montreal_forced_aligner.command_line.create_segments import run_create_segments
	from montreal_forced_aligner.exceptions import MFAError
	from montreal_forced_aligner.config import update_global_config, load_global_config, update_command_history, \
	load_command_history


	class ExitHooks(object):
	def __init__(self):
	self.exit_code = None
	self.exception = None

	def hook(self):
	self._orig_exit = sys.exit
	sys.exit = self.exit
	sys.excepthook = self.exc_handler

	def exit(self, code=0):
	self.exit_code = code
	self._orig_exit(code)

	def exc_handler(self, exc_type, exc, *args):
	self.exception = exc


	hooks = ExitHooks()
	hooks.hook()

	BEGIN = time.time()
	BEGIN_DATE = datetime.now()


	def history_save_handler():
	history_data = {
	'command': ' '.join(sys.argv),
	'execution_time': time.time() - BEGIN,
	'date': BEGIN_DATE,
	'version': __version__
	}

	if hooks.exit_code is not None:
	history_data['exit_code'] = hooks.exit_code
	history_data['exception'] = ''
	elif hooks.exception is not None:
	history_data['exit_code'] = 1
	history_data['exception'] = hooks.exception
	else:
	history_data['exception'] = ''
	history_data['exit_code'] = 0
	update_command_history(history_data)


	atexit.register(history_save_handler)


	def fix_path():
	from montreal_forced_aligner.config import TEMP_DIR
	thirdparty_dir = os.path.join(TEMP_DIR, 'thirdparty', 'bin')
	old_path = os.environ.get('PATH', '')
	if sys.platform == 'win32':
	os.environ['PATH'] = thirdparty_dir + ';' + old_path
	else:
	os.environ['PATH'] = thirdparty_dir + ':' + old_path
	os.environ['LD_LIBRARY_PATH'] = thirdparty_dir + ':' + os.environ.get('LD_LIBRARY_PATH', '')


	def unfix_path():
	if sys.platform == 'win32':
	sep = ';'
	os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
	else:
	sep = ':'
	os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
	os.environ['LD_LIBRARY_PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])


	acoustic_languages = get_available_acoustic_languages()
	ivector_languages = get_available_ivector_languages()
	lm_languages = get_available_lm_languages()
	g2p_languages = get_available_g2p_languages()
	dict_languages = get_available_dict_languages()


	def create_parser():
	GLOBAL_CONFIG = load_global_config()

	def add_global_options(subparser, textgrid_output=False):
	subparser.add_argument('-t', '--temp_directory', type=str, default=GLOBAL_CONFIG['temp_directory'],
	help=f"Temporary directory root to store MFA created files, default is {GLOBAL_CONFIG['temp_directory']}")
	subparser.add_argument('--disable_mp',
	help=f"Disable any multiprocessing during alignment (not recommended), default is {not GLOBAL_CONFIG['use_mp']}",
	action='store_true',
	default=not GLOBAL_CONFIG['use_mp'])
	subparser.add_argument('-j', '--num_jobs', type=int, default=GLOBAL_CONFIG['num_jobs'],
	help=f"Number of data splits (and cores to use if multiprocessing is enabled), defaults "
	f"is {GLOBAL_CONFIG['num_jobs']}")
	subparser.add_argument('-v', '--verbose', help=f"Output debug messages, default is {GLOBAL_CONFIG['verbose']}",
	action='store_true',
	default=GLOBAL_CONFIG['verbose'])
	subparser.add_argument('--clean', help=f"Remove files from previous runs, default is {GLOBAL_CONFIG['clean']}",
	action='store_true',
	default=GLOBAL_CONFIG['clean'])
	subparser.add_argument('--overwrite',
	help=f"Overwrite output files when they exist, default is {GLOBAL_CONFIG['overwrite']}",
	action='store_true',
	default=GLOBAL_CONFIG['overwrite'])
	subparser.add_argument('--debug',
	help=f"Run extra steps for debugging issues, default is {GLOBAL_CONFIG['debug']}",
	action='store_true',
	default=GLOBAL_CONFIG['debug'])
	if textgrid_output:
	subparser.add_argument('--disable_textgrid_cleanup',
	help=f"Disable extra clean up steps on TextGrid output, default is {not GLOBAL_CONFIG['cleanup_textgrids']}",
	action='store_true',
	default=not GLOBAL_CONFIG['cleanup_textgrids'])

	parser = argparse.ArgumentParser()

	subparsers = parser.add_subparsers(dest="subcommand")
	subparsers.required = True

	version_parser = subparsers.add_parser('version')

	align_parser = subparsers.add_parser('align')
	align_parser.add_argument('corpus_directory', help="Full path to the directory to align")
	align_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	align_parser.add_argument('acoustic_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	align_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	align_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for alignment")
	align_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	align_parser.add_argument('-a', '--audio_directory', type=str, default='',
	help="Audio directory root to use for finding audio files")
	add_global_options(align_parser, textgrid_output=True)

	adapt_parser = subparsers.add_parser('adapt')
	adapt_parser.add_argument('corpus_directory', help="Full path to the directory to align")
	adapt_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	adapt_parser.add_argument('acoustic_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	adapt_parser.add_argument('output_model_path',
	help="Full path to save adapted_model")
	adapt_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	adapt_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for alignment")
	adapt_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	adapt_parser.add_argument('-a', '--audio_directory', type=str, default='',
	help="Audio directory root to use for finding audio files")
	add_global_options(adapt_parser, textgrid_output=True)

	train_parser = subparsers.add_parser('train')
	train_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
	train_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
	default='')
	train_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	train_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for training and alignment")
	train_parser.add_argument('-o', '--output_model_path', type=str, default='',
	help="Full path to save resulting acoustic and dictionary model")
	train_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of filenames to use for determining speaker, "
	'default is to use directory names')
	train_parser.add_argument('-a', '--audio_directory', type=str, default='',
	help="Audio directory root to use for finding audio files")
	train_parser.add_argument('-m', '--acoustic_model_path', type=str, default='',
	help="Full path to save adapted_model")

	add_global_options(train_parser, textgrid_output=True)

	validate_parser = subparsers.add_parser('validate')
	validate_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
	validate_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
	default='')
	validate_parser.add_argument('acoustic_model_path', nargs='?', default='',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	validate_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	validate_parser.add_argument('--test_transcriptions', help="Test accuracy of transcriptions", action='store_true')
	validate_parser.add_argument('--ignore_acoustics',
	help="Skip acoustic feature generation and associated validation",
	action='store_true')
	add_global_options(validate_parser)

	g2p_model_help_message = f'''Full path to the archive containing pre-trained model or language ({', '.join(g2p_languages)})
	If not specified, then orthographic transcription is split into pronunciations.'''
	g2p_parser = subparsers.add_parser('g2p')
	g2p_parser.add_argument("g2p_model_path", help=g2p_model_help_message, nargs='?')

	g2p_parser.add_argument("input_path",
	help="Corpus to base word list on or a text file of words to generate pronunciations")
	g2p_parser.add_argument("output_path", help="Path to save output dictionary")
	g2p_parser.add_argument('--include_bracketed', help="Included words enclosed by brackets, i.e. [...], (...), <...>",
	action='store_true')
	g2p_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for G2P")
	add_global_options(g2p_parser)

	train_g2p_parser = subparsers.add_parser('train_g2p')
	train_g2p_parser.add_argument("dictionary_path", help="Location of existing dictionary")

	train_g2p_parser.add_argument("output_model_path", help="Desired location of generated model")
	train_g2p_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for G2P")
	train_g2p_parser.add_argument("--validate", action='store_true',
	help="Perform an analysis of accuracy training on "
	"most of the data and validating on an unseen subset")
	add_global_options(train_g2p_parser)

	download_parser = subparsers.add_parser('download')
	download_parser.add_argument("model_type",
	help="Type of model to download, one of 'acoustic', 'g2p', or 'dictionary'")
	download_parser.add_argument("language", help="Name of language code to download, if not specified, "
	"will list all available languages", nargs='?')

	train_lm_parser = subparsers.add_parser('train_lm')
	train_lm_parser.add_argument('source_path', help="Full path to the source directory to train from, alternatively "
	'an ARPA format language model to convert for MFA use')
	train_lm_parser.add_argument('output_model_path', type=str,
	help="Full path to save resulting language model")
	train_lm_parser.add_argument('-m', '--model_path', type=str,
	help="Full path to existing language model to merge probabilities")
	train_lm_parser.add_argument('-w', '--model_weight', type=float, default=1.0,
	help="Weight factor for supplemental language model, defaults to 1.0")
	train_lm_parser.add_argument('--dictionary_path', help="Full path to the pronunciation dictionary to use",
	default='')
	train_lm_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for training and alignment")
	add_global_options(train_lm_parser)

	train_dictionary_parser = subparsers.add_parser('train_dictionary')
	train_dictionary_parser.add_argument('corpus_directory', help="Full path to the directory to align")
	train_dictionary_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	train_dictionary_parser.add_argument('acoustic_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	train_dictionary_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	train_dictionary_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for alignment")
	train_dictionary_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	add_global_options(train_dictionary_parser)

	train_ivector_parser = subparsers.add_parser('train_ivector')
	train_ivector_parser.add_argument('corpus_directory', help="Full path to the source directory to "
	'train the ivector extractor')
	train_ivector_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	train_ivector_parser.add_argument('acoustic_model_path', type=str, default='',
	help="Full path to acoustic model for alignment")
	train_ivector_parser.add_argument('output_model_path', type=str, default='',
	help="Full path to save resulting ivector extractor")
	train_ivector_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of filenames to use for determining speaker, "
	'default is to use directory names')
	train_ivector_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for training")
	add_global_options(train_ivector_parser)

	classify_speakers_parser = subparsers.add_parser('classify_speakers')
	classify_speakers_parser.add_argument('corpus_directory', help="Full path to the source directory to "
	'run speaker classification')
	classify_speakers_parser.add_argument('ivector_extractor_path', type=str, default='',
	help="Full path to ivector extractor model")
	classify_speakers_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")

	classify_speakers_parser.add_argument('-s', '--num_speakers', type=int, default=0,
	help="Number of speakers if known")
	classify_speakers_parser.add_argument('--cluster', help="Using clustering instead of classification",
	action='store_true')
	classify_speakers_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for ivector extraction")
	add_global_options(classify_speakers_parser)

	create_segments_parser = subparsers.add_parser('create_segments')
	create_segments_parser.add_argument('corpus_directory', help="Full path to the source directory to "
	'run VAD segmentation')
	create_segments_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	create_segments_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for segmentation")
	add_global_options(create_segments_parser)

	transcribe_parser = subparsers.add_parser('transcribe')
	transcribe_parser.add_argument('corpus_directory', help="Full path to the directory to transcribe")
	transcribe_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
	transcribe_parser.add_argument('acoustic_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
	transcribe_parser.add_argument('language_model_path',
	help=f"Full path to the archive containing pre-trained model or language ({', '.join(lm_languages)})")
	transcribe_parser.add_argument('output_directory',
	help="Full path to output directory, will be created if it doesn't exist")
	transcribe_parser.add_argument('--config_path', type=str, default='',
	help="Path to config file to use for transcription")
	transcribe_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
	help="Number of characters of file names to use for determining speaker, "
	'default is to use directory names')
	transcribe_parser.add_argument('-a', '--audio_directory', type=str, default='',
	help="Audio directory root to use for finding audio files")
	transcribe_parser.add_argument('-e', '--evaluate', help="Evaluate the transcription "
	"against golden texts", action='store_true')
	add_global_options(transcribe_parser)

	config_parser = subparsers.add_parser('configure',
	help="The configure command is used to set global defaults for MFA so "
	"you don't have to set them every time you call an MFA command.")
	config_parser.add_argument('-t', '--temp_directory', type=str, default='',
	help=f"Set the default temporary directory, default is {GLOBAL_CONFIG['temp_directory']}")
	config_parser.add_argument('-j', '--num_jobs', type=int,
	help=f"Set the number of processes to use by default, defaults to {GLOBAL_CONFIG['num_jobs']}")
	config_parser.add_argument('--always_clean', help="Always remove files from previous runs by default",
	action='store_true')
	config_parser.add_argument('--never_clean', help="Don't remove files from previous runs by default",
	action='store_true')
	config_parser.add_argument('--always_verbose', help="Default to verbose output", action='store_true')
	config_parser.add_argument('--never_verbose', help="Default to non-verbose output", action='store_true')
	config_parser.add_argument('--always_debug', help="Default to running debugging steps", action='store_true')
	config_parser.add_argument('--never_debug', help="Default to not running debugging steps", action='store_true')
	config_parser.add_argument('--always_overwrite', help="Always overwrite output files", action='store_true')
	config_parser.add_argument('--never_overwrite', help="Never overwrite output files (if file already exists, "
	"the output will be saved in the temp directory)",
	action='store_true')
	config_parser.add_argument('--disable_mp', help="Disable all multiprocessing (not recommended as it will usually "
	"increase processing times)", action='store_true')
	config_parser.add_argument('--enable_mp', help="Enable multiprocessing (recommended and enabled by default)",
	action='store_true')
	config_parser.add_argument('--disable_textgrid_cleanup', help="Disable postprocessing of TextGrids that cleans up "
	"silences and recombines compound words and clitics",
	action='store_true')
	config_parser.add_argument('--enable_textgrid_cleanup', help="Enable postprocessing of TextGrids that cleans up "
	"silences and recombines compound words and clitics",
	action='store_true')

	history_parser = subparsers.add_parser('history')

	history_parser.add_argument('depth', help='Number of commands to list', nargs='?', default=10)
	history_parser.add_argument('--verbose', help="Flag for whether to output additional information",
	action='store_true')

	annotator_parser = subparsers.add_parser('annotator')
	anchor_parser = subparsers.add_parser('anchor')

	thirdparty_parser = subparsers.add_parser('thirdparty')

	thirdparty_parser.add_argument("command",
	help="One of 'download', 'validate', or 'kaldi'")
	thirdparty_parser.add_argument('local_directory',
	help="Full path to the built executables to collect", nargs="?",
	default='')
	return parser


	parser = create_parser()


	def main():
	parser = create_parser()
	mp.freeze_support()
	args, unknown = parser.parse_known_args()
	for short in ['-c', '-d']:
	if short in unknown:
	print(f'Due to the number of options that `{short}` could refer to, it is not accepted. '
	'Please specify the full argument')
	sys.exit(1)
	try:
	fix_path()
	if args.subcommand in ['align', 'train', 'train_ivector']:
	from montreal_forced_aligner.thirdparty.kaldi import validate_alignment_binaries
	if not validate_alignment_binaries():
	print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
	"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
	"on why this check failed.")
	sys.exit(1)
	elif args.subcommand in ['transcribe']:
	from montreal_forced_aligner.thirdparty.kaldi import validate_transcribe_binaries
	if not validate_transcribe_binaries():
	print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
	"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
	"on why this check failed. If you are on MacOS, please note that the thirdparty binaries available "
	"via the download command do not contain the transcription ones. To get this functionality working "
	"for the time being, please build kaldi locally and follow the instructions for running the "
	"'mfa thirdparty kaldi' command.")
	sys.exit(1)
	elif args.subcommand in ['train_dictionary']:
	from montreal_forced_aligner.thirdparty.kaldi import validate_train_dictionary_binaries
	if not validate_train_dictionary_binaries():
	print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
	"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
	"on why this check failed. If you are on MacOS, please note that the thirdparty binaries available "
	"via the download command do not contain the train_dictionary ones. To get this functionality working "
	"for the time being, please build kaldi locally and follow the instructions for running the "
	"'mfa thirdparty kaldi' command.")
	sys.exit(1)
	elif args.subcommand in ['g2p', 'train_g2p']:
	try:
	import pynini
	except ImportError:
	print("There was an issue importing Pynini, please ensure that it is installed. If you are on Windows, "
	"please use the Windows Subsystem for Linux to use g2p functionality.")
	sys.exit(1)
	if args.subcommand == 'align':
	run_align_corpus(args, unknown, acoustic_languages)
	elif args.subcommand == 'adapt':
	run_adapt_model(args, unknown, acoustic_languages)
	elif args.subcommand == 'train':
	run_train_corpus(args, unknown)
	elif args.subcommand == 'g2p':
	run_g2p(args, unknown, g2p_languages)
	elif args.subcommand == 'train_g2p':
	run_train_g2p(args, unknown)
	elif args.subcommand == 'validate':
	run_validate_corpus(args, unknown)
	elif args.subcommand == 'download':
	run_download(args)
	elif args.subcommand == 'train_lm':
	run_train_lm(args, unknown)
	elif args.subcommand == 'train_dictionary':
	run_train_dictionary(args, unknown)
	elif args.subcommand == 'train_ivector':
	run_train_ivector_extractor(args, unknown)
	elif args.subcommand == 'classify_speakers':
	run_classify_speakers(args, unknown)
	elif args.subcommand in ['annotator', 'anchor']:
	from montreal_forced_aligner.command_line.anchor import run_anchor
	run_anchor(args)
	elif args.subcommand == 'thirdparty':
	run_thirdparty(args)
	elif args.subcommand == 'transcribe':
	run_transcribe_corpus(args, unknown)
	elif args.subcommand == 'create_segments':
	run_create_segments(args, unknown)
	elif args.subcommand == 'configure':
	update_global_config(args)
	global GLOBAL_CONFIG
	GLOBAL_CONFIG = load_global_config()
	elif args.subcommand == 'history':
	depth = args.depth
	history = load_command_history()[-depth:]
	for h in history:
	if args.verbose:
	print('command\tDate\tExecution time\tVersion\tExit code\tException')
	for h in history:
	execution_time = time.strftime('%H:%M:%S', time.gmtime(h['execution_time']))
	d = h['date'].isoformat()
	print(
	f"{h['command']}\t{d}\t{execution_time}\t{h['version']}\t{h['exit_code']}\t{h['exception']}")
	pass
	else:
	for h in history:
	print(h['command'])

	elif args.subcommand == 'version':
	print(__version__)
	except MFAError as e:
	if getattr(args, 'debug', False):
	raise
	print(e)
	sys.exit(1)
	finally:
	unfix_path()


	if __name__ == '__main__':
	main()