Spaces:
Runtime error
Runtime error
File size: 30,043 Bytes
d1b91e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 |
import atexit
import sys
import os
import time
import argparse
from datetime import datetime
import multiprocessing as mp
from montreal_forced_aligner import __version__
from montreal_forced_aligner.utils import get_available_acoustic_languages, get_available_g2p_languages, \
get_available_dict_languages, get_available_lm_languages, get_available_ivector_languages
from montreal_forced_aligner.command_line.align import run_align_corpus
from mfa_usr.adapt import run_adapt_model
from montreal_forced_aligner.command_line.train_and_align import run_train_corpus
from montreal_forced_aligner.command_line.g2p import run_g2p
from montreal_forced_aligner.command_line.train_g2p import run_train_g2p
from montreal_forced_aligner.command_line.validate import run_validate_corpus
from montreal_forced_aligner.command_line.download import run_download
from montreal_forced_aligner.command_line.train_lm import run_train_lm
from montreal_forced_aligner.command_line.thirdparty import run_thirdparty
from montreal_forced_aligner.command_line.train_ivector_extractor import run_train_ivector_extractor
from montreal_forced_aligner.command_line.classify_speakers import run_classify_speakers
from montreal_forced_aligner.command_line.transcribe import run_transcribe_corpus
from montreal_forced_aligner.command_line.train_dictionary import run_train_dictionary
from montreal_forced_aligner.command_line.create_segments import run_create_segments
from montreal_forced_aligner.exceptions import MFAError
from montreal_forced_aligner.config import update_global_config, load_global_config, update_command_history, \
load_command_history
class ExitHooks(object):
def __init__(self):
self.exit_code = None
self.exception = None
def hook(self):
self._orig_exit = sys.exit
sys.exit = self.exit
sys.excepthook = self.exc_handler
def exit(self, code=0):
self.exit_code = code
self._orig_exit(code)
def exc_handler(self, exc_type, exc, *args):
self.exception = exc
hooks = ExitHooks()
hooks.hook()
BEGIN = time.time()
BEGIN_DATE = datetime.now()
def history_save_handler():
history_data = {
'command': ' '.join(sys.argv),
'execution_time': time.time() - BEGIN,
'date': BEGIN_DATE,
'version': __version__
}
if hooks.exit_code is not None:
history_data['exit_code'] = hooks.exit_code
history_data['exception'] = ''
elif hooks.exception is not None:
history_data['exit_code'] = 1
history_data['exception'] = hooks.exception
else:
history_data['exception'] = ''
history_data['exit_code'] = 0
update_command_history(history_data)
atexit.register(history_save_handler)
def fix_path():
from montreal_forced_aligner.config import TEMP_DIR
thirdparty_dir = os.path.join(TEMP_DIR, 'thirdparty', 'bin')
old_path = os.environ.get('PATH', '')
if sys.platform == 'win32':
os.environ['PATH'] = thirdparty_dir + ';' + old_path
else:
os.environ['PATH'] = thirdparty_dir + ':' + old_path
os.environ['LD_LIBRARY_PATH'] = thirdparty_dir + ':' + os.environ.get('LD_LIBRARY_PATH', '')
def unfix_path():
if sys.platform == 'win32':
sep = ';'
os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
else:
sep = ':'
os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
os.environ['LD_LIBRARY_PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
acoustic_languages = get_available_acoustic_languages()
ivector_languages = get_available_ivector_languages()
lm_languages = get_available_lm_languages()
g2p_languages = get_available_g2p_languages()
dict_languages = get_available_dict_languages()
def create_parser():
GLOBAL_CONFIG = load_global_config()
def add_global_options(subparser, textgrid_output=False):
subparser.add_argument('-t', '--temp_directory', type=str, default=GLOBAL_CONFIG['temp_directory'],
help=f"Temporary directory root to store MFA created files, default is {GLOBAL_CONFIG['temp_directory']}")
subparser.add_argument('--disable_mp',
help=f"Disable any multiprocessing during alignment (not recommended), default is {not GLOBAL_CONFIG['use_mp']}",
action='store_true',
default=not GLOBAL_CONFIG['use_mp'])
subparser.add_argument('-j', '--num_jobs', type=int, default=GLOBAL_CONFIG['num_jobs'],
help=f"Number of data splits (and cores to use if multiprocessing is enabled), defaults "
f"is {GLOBAL_CONFIG['num_jobs']}")
subparser.add_argument('-v', '--verbose', help=f"Output debug messages, default is {GLOBAL_CONFIG['verbose']}",
action='store_true',
default=GLOBAL_CONFIG['verbose'])
subparser.add_argument('--clean', help=f"Remove files from previous runs, default is {GLOBAL_CONFIG['clean']}",
action='store_true',
default=GLOBAL_CONFIG['clean'])
subparser.add_argument('--overwrite',
help=f"Overwrite output files when they exist, default is {GLOBAL_CONFIG['overwrite']}",
action='store_true',
default=GLOBAL_CONFIG['overwrite'])
subparser.add_argument('--debug',
help=f"Run extra steps for debugging issues, default is {GLOBAL_CONFIG['debug']}",
action='store_true',
default=GLOBAL_CONFIG['debug'])
if textgrid_output:
subparser.add_argument('--disable_textgrid_cleanup',
help=f"Disable extra clean up steps on TextGrid output, default is {not GLOBAL_CONFIG['cleanup_textgrids']}",
action='store_true',
default=not GLOBAL_CONFIG['cleanup_textgrids'])
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="subcommand")
subparsers.required = True
version_parser = subparsers.add_parser('version')
align_parser = subparsers.add_parser('align')
align_parser.add_argument('corpus_directory', help="Full path to the directory to align")
align_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
align_parser.add_argument('acoustic_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
align_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
align_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for alignment")
align_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
align_parser.add_argument('-a', '--audio_directory', type=str, default='',
help="Audio directory root to use for finding audio files")
add_global_options(align_parser, textgrid_output=True)
adapt_parser = subparsers.add_parser('adapt')
adapt_parser.add_argument('corpus_directory', help="Full path to the directory to align")
adapt_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
adapt_parser.add_argument('acoustic_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
adapt_parser.add_argument('output_model_path',
help="Full path to save adapted_model")
adapt_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
adapt_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for alignment")
adapt_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
adapt_parser.add_argument('-a', '--audio_directory', type=str, default='',
help="Audio directory root to use for finding audio files")
add_global_options(adapt_parser, textgrid_output=True)
train_parser = subparsers.add_parser('train')
train_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
train_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
default='')
train_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
train_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for training and alignment")
train_parser.add_argument('-o', '--output_model_path', type=str, default='',
help="Full path to save resulting acoustic and dictionary model")
train_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of filenames to use for determining speaker, "
'default is to use directory names')
train_parser.add_argument('-a', '--audio_directory', type=str, default='',
help="Audio directory root to use for finding audio files")
train_parser.add_argument('-m', '--acoustic_model_path', type=str, default='',
help="Full path to save adapted_model")
add_global_options(train_parser, textgrid_output=True)
validate_parser = subparsers.add_parser('validate')
validate_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
validate_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
default='')
validate_parser.add_argument('acoustic_model_path', nargs='?', default='',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
validate_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
validate_parser.add_argument('--test_transcriptions', help="Test accuracy of transcriptions", action='store_true')
validate_parser.add_argument('--ignore_acoustics',
help="Skip acoustic feature generation and associated validation",
action='store_true')
add_global_options(validate_parser)
g2p_model_help_message = f'''Full path to the archive containing pre-trained model or language ({', '.join(g2p_languages)})
If not specified, then orthographic transcription is split into pronunciations.'''
g2p_parser = subparsers.add_parser('g2p')
g2p_parser.add_argument("g2p_model_path", help=g2p_model_help_message, nargs='?')
g2p_parser.add_argument("input_path",
help="Corpus to base word list on or a text file of words to generate pronunciations")
g2p_parser.add_argument("output_path", help="Path to save output dictionary")
g2p_parser.add_argument('--include_bracketed', help="Included words enclosed by brackets, i.e. [...], (...), <...>",
action='store_true')
g2p_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for G2P")
add_global_options(g2p_parser)
train_g2p_parser = subparsers.add_parser('train_g2p')
train_g2p_parser.add_argument("dictionary_path", help="Location of existing dictionary")
train_g2p_parser.add_argument("output_model_path", help="Desired location of generated model")
train_g2p_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for G2P")
train_g2p_parser.add_argument("--validate", action='store_true',
help="Perform an analysis of accuracy training on "
"most of the data and validating on an unseen subset")
add_global_options(train_g2p_parser)
download_parser = subparsers.add_parser('download')
download_parser.add_argument("model_type",
help="Type of model to download, one of 'acoustic', 'g2p', or 'dictionary'")
download_parser.add_argument("language", help="Name of language code to download, if not specified, "
"will list all available languages", nargs='?')
train_lm_parser = subparsers.add_parser('train_lm')
train_lm_parser.add_argument('source_path', help="Full path to the source directory to train from, alternatively "
'an ARPA format language model to convert for MFA use')
train_lm_parser.add_argument('output_model_path', type=str,
help="Full path to save resulting language model")
train_lm_parser.add_argument('-m', '--model_path', type=str,
help="Full path to existing language model to merge probabilities")
train_lm_parser.add_argument('-w', '--model_weight', type=float, default=1.0,
help="Weight factor for supplemental language model, defaults to 1.0")
train_lm_parser.add_argument('--dictionary_path', help="Full path to the pronunciation dictionary to use",
default='')
train_lm_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for training and alignment")
add_global_options(train_lm_parser)
train_dictionary_parser = subparsers.add_parser('train_dictionary')
train_dictionary_parser.add_argument('corpus_directory', help="Full path to the directory to align")
train_dictionary_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
train_dictionary_parser.add_argument('acoustic_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
train_dictionary_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
train_dictionary_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for alignment")
train_dictionary_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
add_global_options(train_dictionary_parser)
train_ivector_parser = subparsers.add_parser('train_ivector')
train_ivector_parser.add_argument('corpus_directory', help="Full path to the source directory to "
'train the ivector extractor')
train_ivector_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
train_ivector_parser.add_argument('acoustic_model_path', type=str, default='',
help="Full path to acoustic model for alignment")
train_ivector_parser.add_argument('output_model_path', type=str, default='',
help="Full path to save resulting ivector extractor")
train_ivector_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of filenames to use for determining speaker, "
'default is to use directory names')
train_ivector_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for training")
add_global_options(train_ivector_parser)
classify_speakers_parser = subparsers.add_parser('classify_speakers')
classify_speakers_parser.add_argument('corpus_directory', help="Full path to the source directory to "
'run speaker classification')
classify_speakers_parser.add_argument('ivector_extractor_path', type=str, default='',
help="Full path to ivector extractor model")
classify_speakers_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
classify_speakers_parser.add_argument('-s', '--num_speakers', type=int, default=0,
help="Number of speakers if known")
classify_speakers_parser.add_argument('--cluster', help="Using clustering instead of classification",
action='store_true')
classify_speakers_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for ivector extraction")
add_global_options(classify_speakers_parser)
create_segments_parser = subparsers.add_parser('create_segments')
create_segments_parser.add_argument('corpus_directory', help="Full path to the source directory to "
'run VAD segmentation')
create_segments_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
create_segments_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for segmentation")
add_global_options(create_segments_parser)
transcribe_parser = subparsers.add_parser('transcribe')
transcribe_parser.add_argument('corpus_directory', help="Full path to the directory to transcribe")
transcribe_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
transcribe_parser.add_argument('acoustic_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
transcribe_parser.add_argument('language_model_path',
help=f"Full path to the archive containing pre-trained model or language ({', '.join(lm_languages)})")
transcribe_parser.add_argument('output_directory',
help="Full path to output directory, will be created if it doesn't exist")
transcribe_parser.add_argument('--config_path', type=str, default='',
help="Path to config file to use for transcription")
transcribe_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
help="Number of characters of file names to use for determining speaker, "
'default is to use directory names')
transcribe_parser.add_argument('-a', '--audio_directory', type=str, default='',
help="Audio directory root to use for finding audio files")
transcribe_parser.add_argument('-e', '--evaluate', help="Evaluate the transcription "
"against golden texts", action='store_true')
add_global_options(transcribe_parser)
config_parser = subparsers.add_parser('configure',
help="The configure command is used to set global defaults for MFA so "
"you don't have to set them every time you call an MFA command.")
config_parser.add_argument('-t', '--temp_directory', type=str, default='',
help=f"Set the default temporary directory, default is {GLOBAL_CONFIG['temp_directory']}")
config_parser.add_argument('-j', '--num_jobs', type=int,
help=f"Set the number of processes to use by default, defaults to {GLOBAL_CONFIG['num_jobs']}")
config_parser.add_argument('--always_clean', help="Always remove files from previous runs by default",
action='store_true')
config_parser.add_argument('--never_clean', help="Don't remove files from previous runs by default",
action='store_true')
config_parser.add_argument('--always_verbose', help="Default to verbose output", action='store_true')
config_parser.add_argument('--never_verbose', help="Default to non-verbose output", action='store_true')
config_parser.add_argument('--always_debug', help="Default to running debugging steps", action='store_true')
config_parser.add_argument('--never_debug', help="Default to not running debugging steps", action='store_true')
config_parser.add_argument('--always_overwrite', help="Always overwrite output files", action='store_true')
config_parser.add_argument('--never_overwrite', help="Never overwrite output files (if file already exists, "
"the output will be saved in the temp directory)",
action='store_true')
config_parser.add_argument('--disable_mp', help="Disable all multiprocessing (not recommended as it will usually "
"increase processing times)", action='store_true')
config_parser.add_argument('--enable_mp', help="Enable multiprocessing (recommended and enabled by default)",
action='store_true')
config_parser.add_argument('--disable_textgrid_cleanup', help="Disable postprocessing of TextGrids that cleans up "
"silences and recombines compound words and clitics",
action='store_true')
config_parser.add_argument('--enable_textgrid_cleanup', help="Enable postprocessing of TextGrids that cleans up "
"silences and recombines compound words and clitics",
action='store_true')
history_parser = subparsers.add_parser('history')
history_parser.add_argument('depth', help='Number of commands to list', nargs='?', default=10)
history_parser.add_argument('--verbose', help="Flag for whether to output additional information",
action='store_true')
annotator_parser = subparsers.add_parser('annotator')
anchor_parser = subparsers.add_parser('anchor')
thirdparty_parser = subparsers.add_parser('thirdparty')
thirdparty_parser.add_argument("command",
help="One of 'download', 'validate', or 'kaldi'")
thirdparty_parser.add_argument('local_directory',
help="Full path to the built executables to collect", nargs="?",
default='')
return parser
parser = create_parser()
def main():
parser = create_parser()
mp.freeze_support()
args, unknown = parser.parse_known_args()
for short in ['-c', '-d']:
if short in unknown:
print(f'Due to the number of options that `{short}` could refer to, it is not accepted. '
'Please specify the full argument')
sys.exit(1)
try:
fix_path()
if args.subcommand in ['align', 'train', 'train_ivector']:
from montreal_forced_aligner.thirdparty.kaldi import validate_alignment_binaries
if not validate_alignment_binaries():
print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
"on why this check failed.")
sys.exit(1)
elif args.subcommand in ['transcribe']:
from montreal_forced_aligner.thirdparty.kaldi import validate_transcribe_binaries
if not validate_transcribe_binaries():
print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
"on why this check failed. If you are on MacOS, please note that the thirdparty binaries available "
"via the download command do not contain the transcription ones. To get this functionality working "
"for the time being, please build kaldi locally and follow the instructions for running the "
"'mfa thirdparty kaldi' command.")
sys.exit(1)
elif args.subcommand in ['train_dictionary']:
from montreal_forced_aligner.thirdparty.kaldi import validate_train_dictionary_binaries
if not validate_train_dictionary_binaries():
print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
"'mfa thirdparty download' command. See 'mfa thirdparty validate' for more detailed information "
"on why this check failed. If you are on MacOS, please note that the thirdparty binaries available "
"via the download command do not contain the train_dictionary ones. To get this functionality working "
"for the time being, please build kaldi locally and follow the instructions for running the "
"'mfa thirdparty kaldi' command.")
sys.exit(1)
elif args.subcommand in ['g2p', 'train_g2p']:
try:
import pynini
except ImportError:
print("There was an issue importing Pynini, please ensure that it is installed. If you are on Windows, "
"please use the Windows Subsystem for Linux to use g2p functionality.")
sys.exit(1)
if args.subcommand == 'align':
run_align_corpus(args, unknown, acoustic_languages)
elif args.subcommand == 'adapt':
run_adapt_model(args, unknown, acoustic_languages)
elif args.subcommand == 'train':
run_train_corpus(args, unknown)
elif args.subcommand == 'g2p':
run_g2p(args, unknown, g2p_languages)
elif args.subcommand == 'train_g2p':
run_train_g2p(args, unknown)
elif args.subcommand == 'validate':
run_validate_corpus(args, unknown)
elif args.subcommand == 'download':
run_download(args)
elif args.subcommand == 'train_lm':
run_train_lm(args, unknown)
elif args.subcommand == 'train_dictionary':
run_train_dictionary(args, unknown)
elif args.subcommand == 'train_ivector':
run_train_ivector_extractor(args, unknown)
elif args.subcommand == 'classify_speakers':
run_classify_speakers(args, unknown)
elif args.subcommand in ['annotator', 'anchor']:
from montreal_forced_aligner.command_line.anchor import run_anchor
run_anchor(args)
elif args.subcommand == 'thirdparty':
run_thirdparty(args)
elif args.subcommand == 'transcribe':
run_transcribe_corpus(args, unknown)
elif args.subcommand == 'create_segments':
run_create_segments(args, unknown)
elif args.subcommand == 'configure':
update_global_config(args)
global GLOBAL_CONFIG
GLOBAL_CONFIG = load_global_config()
elif args.subcommand == 'history':
depth = args.depth
history = load_command_history()[-depth:]
for h in history:
if args.verbose:
print('command\tDate\tExecution time\tVersion\tExit code\tException')
for h in history:
execution_time = time.strftime('%H:%M:%S', time.gmtime(h['execution_time']))
d = h['date'].isoformat()
print(
f"{h['command']}\t{d}\t{execution_time}\t{h['version']}\t{h['exit_code']}\t{h['exception']}")
pass
else:
for h in history:
print(h['command'])
elif args.subcommand == 'version':
print(__version__)
except MFAError as e:
if getattr(args, 'debug', False):
raise
print(e)
sys.exit(1)
finally:
unfix_path()
if __name__ == '__main__':
main()
|