File size: 30,043 Bytes
d1b91e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
import atexit
import sys
import os
import time
import argparse
from datetime import datetime
import multiprocessing as mp

from montreal_forced_aligner import __version__

from montreal_forced_aligner.utils import get_available_acoustic_languages, get_available_g2p_languages, \
    get_available_dict_languages, get_available_lm_languages, get_available_ivector_languages
from montreal_forced_aligner.command_line.align import run_align_corpus

from mfa_usr.adapt import run_adapt_model
from montreal_forced_aligner.command_line.train_and_align import run_train_corpus
from montreal_forced_aligner.command_line.g2p import run_g2p
from montreal_forced_aligner.command_line.train_g2p import run_train_g2p
from montreal_forced_aligner.command_line.validate import run_validate_corpus
from montreal_forced_aligner.command_line.download import run_download
from montreal_forced_aligner.command_line.train_lm import run_train_lm
from montreal_forced_aligner.command_line.thirdparty import run_thirdparty
from montreal_forced_aligner.command_line.train_ivector_extractor import run_train_ivector_extractor
from montreal_forced_aligner.command_line.classify_speakers import run_classify_speakers
from montreal_forced_aligner.command_line.transcribe import run_transcribe_corpus
from montreal_forced_aligner.command_line.train_dictionary import run_train_dictionary
from montreal_forced_aligner.command_line.create_segments import run_create_segments
from montreal_forced_aligner.exceptions import MFAError
from montreal_forced_aligner.config import update_global_config, load_global_config, update_command_history, \
    load_command_history


class ExitHooks(object):
    def __init__(self):
        self.exit_code = None
        self.exception = None

    def hook(self):
        self._orig_exit = sys.exit
        sys.exit = self.exit
        sys.excepthook = self.exc_handler

    def exit(self, code=0):
        self.exit_code = code
        self._orig_exit(code)

    def exc_handler(self, exc_type, exc, *args):
        self.exception = exc


hooks = ExitHooks()
hooks.hook()

BEGIN = time.time()
BEGIN_DATE = datetime.now()


def history_save_handler():
    history_data = {
        'command': ' '.join(sys.argv),
        'execution_time': time.time() - BEGIN,
        'date': BEGIN_DATE,
        'version': __version__
    }

    if hooks.exit_code is not None:
        history_data['exit_code'] = hooks.exit_code
        history_data['exception'] = ''
    elif hooks.exception is not None:
        history_data['exit_code'] = 1
        history_data['exception'] = hooks.exception
    else:
        history_data['exception'] = ''
        history_data['exit_code'] = 0
    update_command_history(history_data)


atexit.register(history_save_handler)


def fix_path():
    from montreal_forced_aligner.config import TEMP_DIR
    thirdparty_dir = os.path.join(TEMP_DIR, 'thirdparty', 'bin')
    old_path = os.environ.get('PATH', '')
    if sys.platform == 'win32':
        os.environ['PATH'] = thirdparty_dir + ';' + old_path
    else:
        os.environ['PATH'] = thirdparty_dir + ':' + old_path
        os.environ['LD_LIBRARY_PATH'] = thirdparty_dir + ':' + os.environ.get('LD_LIBRARY_PATH', '')


def unfix_path():
    if sys.platform == 'win32':
        sep = ';'
        os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
    else:
        sep = ':'
        os.environ['PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])
        os.environ['LD_LIBRARY_PATH'] = sep.join(os.environ['PATH'].split(sep)[1:])


acoustic_languages = get_available_acoustic_languages()
ivector_languages = get_available_ivector_languages()
lm_languages = get_available_lm_languages()
g2p_languages = get_available_g2p_languages()
dict_languages = get_available_dict_languages()


def create_parser():
    GLOBAL_CONFIG = load_global_config()

    def add_global_options(subparser, textgrid_output=False):
        subparser.add_argument('-t', '--temp_directory', type=str, default=GLOBAL_CONFIG['temp_directory'],
                               help=f"Temporary directory root to store MFA created files, default is {GLOBAL_CONFIG['temp_directory']}")
        subparser.add_argument('--disable_mp',
                               help=f"Disable any multiprocessing during alignment (not recommended), default is {not GLOBAL_CONFIG['use_mp']}",
                               action='store_true',
                               default=not GLOBAL_CONFIG['use_mp'])
        subparser.add_argument('-j', '--num_jobs', type=int, default=GLOBAL_CONFIG['num_jobs'],
                               help=f"Number of data splits (and cores to use if multiprocessing is enabled), defaults "
                                    f"is {GLOBAL_CONFIG['num_jobs']}")
        subparser.add_argument('-v', '--verbose', help=f"Output debug messages, default is {GLOBAL_CONFIG['verbose']}",
                               action='store_true',
                               default=GLOBAL_CONFIG['verbose'])
        subparser.add_argument('--clean', help=f"Remove files from previous runs, default is {GLOBAL_CONFIG['clean']}",
                               action='store_true',
                               default=GLOBAL_CONFIG['clean'])
        subparser.add_argument('--overwrite',
                               help=f"Overwrite output files when they exist, default is {GLOBAL_CONFIG['overwrite']}",
                               action='store_true',
                               default=GLOBAL_CONFIG['overwrite'])
        subparser.add_argument('--debug',
                               help=f"Run extra steps for debugging issues, default is {GLOBAL_CONFIG['debug']}",
                               action='store_true',
                               default=GLOBAL_CONFIG['debug'])
        if textgrid_output:
            subparser.add_argument('--disable_textgrid_cleanup',
                                   help=f"Disable extra clean up steps on TextGrid output, default is {not GLOBAL_CONFIG['cleanup_textgrids']}",
                                   action='store_true',
                                   default=not GLOBAL_CONFIG['cleanup_textgrids'])

    parser = argparse.ArgumentParser()

    subparsers = parser.add_subparsers(dest="subcommand")
    subparsers.required = True

    version_parser = subparsers.add_parser('version')

    align_parser = subparsers.add_parser('align')
    align_parser.add_argument('corpus_directory', help="Full path to the directory to align")
    align_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
    align_parser.add_argument('acoustic_model_path',
                              help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
    align_parser.add_argument('output_directory',
                              help="Full path to output directory, will be created if it doesn't exist")
    align_parser.add_argument('--config_path', type=str, default='',
                              help="Path to config file to use for alignment")
    align_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
                              help="Number of characters of file names to use for determining speaker, "
                                   'default is to use directory names')
    align_parser.add_argument('-a', '--audio_directory', type=str, default='',
                              help="Audio directory root to use for finding audio files")
    add_global_options(align_parser, textgrid_output=True)

    adapt_parser = subparsers.add_parser('adapt')
    adapt_parser.add_argument('corpus_directory', help="Full path to the directory to align")
    adapt_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
    adapt_parser.add_argument('acoustic_model_path',
                              help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
    adapt_parser.add_argument('output_model_path',
                              help="Full path to save adapted_model")
    adapt_parser.add_argument('output_directory',
                              help="Full path to output directory, will be created if it doesn't exist")
    adapt_parser.add_argument('--config_path', type=str, default='',
                              help="Path to config file to use for alignment")
    adapt_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
                              help="Number of characters of file names to use for determining speaker, "
                                   'default is to use directory names')
    adapt_parser.add_argument('-a', '--audio_directory', type=str, default='',
                              help="Audio directory root to use for finding audio files")
    add_global_options(adapt_parser, textgrid_output=True)

    train_parser = subparsers.add_parser('train')
    train_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
    train_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
                              default='')
    train_parser.add_argument('output_directory',
                              help="Full path to output directory, will be created if it doesn't exist")
    train_parser.add_argument('--config_path', type=str, default='',
                              help="Path to config file to use for training and alignment")
    train_parser.add_argument('-o', '--output_model_path', type=str, default='',
                              help="Full path to save resulting acoustic and dictionary model")
    train_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
                              help="Number of characters of filenames to use for determining speaker, "
                                   'default is to use directory names')
    train_parser.add_argument('-a', '--audio_directory', type=str, default='',
                              help="Audio directory root to use for finding audio files")
    train_parser.add_argument('-m', '--acoustic_model_path', type=str, default='',
                              help="Full path to save adapted_model")

    add_global_options(train_parser, textgrid_output=True)

    validate_parser = subparsers.add_parser('validate')
    validate_parser.add_argument('corpus_directory', help="Full path to the source directory to align")
    validate_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use",
                                 default='')
    validate_parser.add_argument('acoustic_model_path', nargs='?', default='',
                                 help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
    validate_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
                                 help="Number of characters of file names to use for determining speaker, "
                                      'default is to use directory names')
    validate_parser.add_argument('--test_transcriptions', help="Test accuracy of transcriptions", action='store_true')
    validate_parser.add_argument('--ignore_acoustics',
                                 help="Skip acoustic feature generation and associated validation",
                                 action='store_true')
    add_global_options(validate_parser)

    g2p_model_help_message = f'''Full path to the archive containing pre-trained model or language ({', '.join(g2p_languages)})
    If not specified, then orthographic transcription is split into pronunciations.'''
    g2p_parser = subparsers.add_parser('g2p')
    g2p_parser.add_argument("g2p_model_path", help=g2p_model_help_message, nargs='?')

    g2p_parser.add_argument("input_path",
                            help="Corpus to base word list on or a text file of words to generate pronunciations")
    g2p_parser.add_argument("output_path", help="Path to save output dictionary")
    g2p_parser.add_argument('--include_bracketed', help="Included words enclosed by brackets, i.e. [...], (...), <...>",
                            action='store_true')
    g2p_parser.add_argument('--config_path', type=str, default='',
                            help="Path to config file to use for G2P")
    add_global_options(g2p_parser)

    train_g2p_parser = subparsers.add_parser('train_g2p')
    train_g2p_parser.add_argument("dictionary_path", help="Location of existing dictionary")

    train_g2p_parser.add_argument("output_model_path", help="Desired location of generated model")
    train_g2p_parser.add_argument('--config_path', type=str, default='',
                                  help="Path to config file to use for G2P")
    train_g2p_parser.add_argument("--validate", action='store_true',
                                  help="Perform an analysis of accuracy training on "
                                       "most of the data and validating on an unseen subset")
    add_global_options(train_g2p_parser)

    download_parser = subparsers.add_parser('download')
    download_parser.add_argument("model_type",
                                 help="Type of model to download, one of 'acoustic', 'g2p', or 'dictionary'")
    download_parser.add_argument("language", help="Name of language code to download, if not specified, "
                                                  "will list all available languages", nargs='?')

    train_lm_parser = subparsers.add_parser('train_lm')
    train_lm_parser.add_argument('source_path', help="Full path to the source directory to train from, alternatively "
                                                     'an ARPA format language model to convert for MFA use')
    train_lm_parser.add_argument('output_model_path', type=str,
                                 help="Full path to save resulting language model")
    train_lm_parser.add_argument('-m', '--model_path', type=str,
                                 help="Full path to existing language model to merge probabilities")
    train_lm_parser.add_argument('-w', '--model_weight', type=float, default=1.0,
                                 help="Weight factor for supplemental language model, defaults to 1.0")
    train_lm_parser.add_argument('--dictionary_path', help="Full path to the pronunciation dictionary to use",
                                 default='')
    train_lm_parser.add_argument('--config_path', type=str, default='',
                                 help="Path to config file to use for training and alignment")
    add_global_options(train_lm_parser)

    train_dictionary_parser = subparsers.add_parser('train_dictionary')
    train_dictionary_parser.add_argument('corpus_directory', help="Full path to the directory to align")
    train_dictionary_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
    train_dictionary_parser.add_argument('acoustic_model_path',
                                         help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
    train_dictionary_parser.add_argument('output_directory',
                                         help="Full path to output directory, will be created if it doesn't exist")
    train_dictionary_parser.add_argument('--config_path', type=str, default='',
                                         help="Path to config file to use for alignment")
    train_dictionary_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
                                         help="Number of characters of file names to use for determining speaker, "
                                              'default is to use directory names')
    add_global_options(train_dictionary_parser)

    train_ivector_parser = subparsers.add_parser('train_ivector')
    train_ivector_parser.add_argument('corpus_directory', help="Full path to the source directory to "
                                                               'train the ivector extractor')
    train_ivector_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
    train_ivector_parser.add_argument('acoustic_model_path', type=str, default='',
                                      help="Full path to acoustic model for alignment")
    train_ivector_parser.add_argument('output_model_path', type=str, default='',
                                      help="Full path to save resulting ivector extractor")
    train_ivector_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
                                      help="Number of characters of filenames to use for determining speaker, "
                                           'default is to use directory names')
    train_ivector_parser.add_argument('--config_path', type=str, default='',
                                      help="Path to config file to use for training")
    add_global_options(train_ivector_parser)

    classify_speakers_parser = subparsers.add_parser('classify_speakers')
    classify_speakers_parser.add_argument('corpus_directory', help="Full path to the source directory to "
                                                                   'run speaker classification')
    classify_speakers_parser.add_argument('ivector_extractor_path', type=str, default='',
                                          help="Full path to ivector extractor model")
    classify_speakers_parser.add_argument('output_directory',
                                          help="Full path to output directory, will be created if it doesn't exist")

    classify_speakers_parser.add_argument('-s', '--num_speakers', type=int, default=0,
                                          help="Number of speakers if known")
    classify_speakers_parser.add_argument('--cluster', help="Using clustering instead of classification",
                                          action='store_true')
    classify_speakers_parser.add_argument('--config_path', type=str, default='',
                                          help="Path to config file to use for ivector extraction")
    add_global_options(classify_speakers_parser)

    create_segments_parser = subparsers.add_parser('create_segments')
    create_segments_parser.add_argument('corpus_directory', help="Full path to the source directory to "
                                                                 'run VAD segmentation')
    create_segments_parser.add_argument('output_directory',
                                        help="Full path to output directory, will be created if it doesn't exist")
    create_segments_parser.add_argument('--config_path', type=str, default='',
                                        help="Path to config file to use for segmentation")
    add_global_options(create_segments_parser)

    transcribe_parser = subparsers.add_parser('transcribe')
    transcribe_parser.add_argument('corpus_directory', help="Full path to the directory to transcribe")
    transcribe_parser.add_argument('dictionary_path', help="Full path to the pronunciation dictionary to use")
    transcribe_parser.add_argument('acoustic_model_path',
                                   help=f"Full path to the archive containing pre-trained model or language ({', '.join(acoustic_languages)})")
    transcribe_parser.add_argument('language_model_path',
                                   help=f"Full path to the archive containing pre-trained model or language ({', '.join(lm_languages)})")
    transcribe_parser.add_argument('output_directory',
                                   help="Full path to output directory, will be created if it doesn't exist")
    transcribe_parser.add_argument('--config_path', type=str, default='',
                                   help="Path to config file to use for transcription")
    transcribe_parser.add_argument('-s', '--speaker_characters', type=str, default='0',
                                   help="Number of characters of file names to use for determining speaker, "
                                        'default is to use directory names')
    transcribe_parser.add_argument('-a', '--audio_directory', type=str, default='',
                                   help="Audio directory root to use for finding audio files")
    transcribe_parser.add_argument('-e', '--evaluate', help="Evaluate the transcription "
                                                            "against golden texts", action='store_true')
    add_global_options(transcribe_parser)

    config_parser = subparsers.add_parser('configure',
                                          help="The configure command is used to set global defaults for MFA so "
                                               "you don't have to set them every time you call an MFA command.")
    config_parser.add_argument('-t', '--temp_directory', type=str, default='',
                               help=f"Set the default temporary directory, default is {GLOBAL_CONFIG['temp_directory']}")
    config_parser.add_argument('-j', '--num_jobs', type=int,
                               help=f"Set the number of processes to use by default, defaults to {GLOBAL_CONFIG['num_jobs']}")
    config_parser.add_argument('--always_clean', help="Always remove files from previous runs by default",
                               action='store_true')
    config_parser.add_argument('--never_clean', help="Don't remove files from previous runs by default",
                               action='store_true')
    config_parser.add_argument('--always_verbose', help="Default to verbose output", action='store_true')
    config_parser.add_argument('--never_verbose', help="Default to non-verbose output", action='store_true')
    config_parser.add_argument('--always_debug', help="Default to running debugging steps", action='store_true')
    config_parser.add_argument('--never_debug', help="Default to not running debugging steps", action='store_true')
    config_parser.add_argument('--always_overwrite', help="Always overwrite output files", action='store_true')
    config_parser.add_argument('--never_overwrite', help="Never overwrite output files (if file already exists, "
                                                         "the output will be saved in the temp directory)",
                               action='store_true')
    config_parser.add_argument('--disable_mp', help="Disable all multiprocessing (not recommended as it will usually "
                                                    "increase processing times)", action='store_true')
    config_parser.add_argument('--enable_mp', help="Enable multiprocessing (recommended and enabled by default)",
                               action='store_true')
    config_parser.add_argument('--disable_textgrid_cleanup', help="Disable postprocessing of TextGrids that cleans up "
                                                                  "silences and recombines compound words and clitics",
                               action='store_true')
    config_parser.add_argument('--enable_textgrid_cleanup', help="Enable postprocessing of TextGrids that cleans up "
                                                                 "silences and recombines compound words and clitics",
                               action='store_true')

    history_parser = subparsers.add_parser('history')

    history_parser.add_argument('depth', help='Number of commands to list', nargs='?', default=10)
    history_parser.add_argument('--verbose', help="Flag for whether to output additional information",
                                action='store_true')

    annotator_parser = subparsers.add_parser('annotator')
    anchor_parser = subparsers.add_parser('anchor')

    thirdparty_parser = subparsers.add_parser('thirdparty')

    thirdparty_parser.add_argument("command",
                                   help="One of 'download', 'validate', or 'kaldi'")
    thirdparty_parser.add_argument('local_directory',
                                   help="Full path to the built executables to collect", nargs="?",
                                   default='')
    return parser


parser = create_parser()


def main():
    parser = create_parser()
    mp.freeze_support()
    args, unknown = parser.parse_known_args()
    for short in ['-c', '-d']:
        if short in unknown:
            print(f'Due to the number of options that `{short}` could refer to, it is not accepted. '
                  'Please specify the full argument')
            sys.exit(1)
    try:
        fix_path()
        if args.subcommand in ['align', 'train', 'train_ivector']:
            from montreal_forced_aligner.thirdparty.kaldi import validate_alignment_binaries
            if not validate_alignment_binaries():
                print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
                      "'mfa thirdparty download' command.  See 'mfa thirdparty validate' for more detailed information "
                      "on why this check failed.")
                sys.exit(1)
        elif args.subcommand in ['transcribe']:
            from montreal_forced_aligner.thirdparty.kaldi import validate_transcribe_binaries
            if not validate_transcribe_binaries():
                print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
                      "'mfa thirdparty download' command.  See 'mfa thirdparty validate' for more detailed information "
                      "on why this check failed.  If you are on MacOS, please note that the thirdparty binaries available "
                      "via the download command do not contain the transcription ones.  To get this functionality working "
                      "for the time being, please build kaldi locally and follow the instructions for running the "
                      "'mfa thirdparty kaldi' command.")
                sys.exit(1)
        elif args.subcommand in ['train_dictionary']:
            from montreal_forced_aligner.thirdparty.kaldi import validate_train_dictionary_binaries
            if not validate_train_dictionary_binaries():
                print("There was an issue validating Kaldi binaries, please ensure you've downloaded them via the "
                      "'mfa thirdparty download' command.  See 'mfa thirdparty validate' for more detailed information "
                      "on why this check failed.  If you are on MacOS, please note that the thirdparty binaries available "
                      "via the download command do not contain the train_dictionary ones.  To get this functionality working "
                      "for the time being, please build kaldi locally and follow the instructions for running the "
                      "'mfa thirdparty kaldi' command.")
                sys.exit(1)
        elif args.subcommand in ['g2p', 'train_g2p']:
            try:
                import pynini
            except ImportError:
                print("There was an issue importing Pynini, please ensure that it is installed. If you are on Windows, "
                      "please use the Windows Subsystem for Linux to use g2p functionality.")
                sys.exit(1)
        if args.subcommand == 'align':
            run_align_corpus(args, unknown, acoustic_languages)
        elif args.subcommand == 'adapt':
            run_adapt_model(args, unknown, acoustic_languages)
        elif args.subcommand == 'train':
            run_train_corpus(args, unknown)
        elif args.subcommand == 'g2p':
            run_g2p(args, unknown, g2p_languages)
        elif args.subcommand == 'train_g2p':
            run_train_g2p(args, unknown)
        elif args.subcommand == 'validate':
            run_validate_corpus(args, unknown)
        elif args.subcommand == 'download':
            run_download(args)
        elif args.subcommand == 'train_lm':
            run_train_lm(args, unknown)
        elif args.subcommand == 'train_dictionary':
            run_train_dictionary(args, unknown)
        elif args.subcommand == 'train_ivector':
            run_train_ivector_extractor(args, unknown)
        elif args.subcommand == 'classify_speakers':
            run_classify_speakers(args, unknown)
        elif args.subcommand in ['annotator', 'anchor']:
            from montreal_forced_aligner.command_line.anchor import run_anchor
            run_anchor(args)
        elif args.subcommand == 'thirdparty':
            run_thirdparty(args)
        elif args.subcommand == 'transcribe':
            run_transcribe_corpus(args, unknown)
        elif args.subcommand == 'create_segments':
            run_create_segments(args, unknown)
        elif args.subcommand == 'configure':
            update_global_config(args)
            global GLOBAL_CONFIG
            GLOBAL_CONFIG = load_global_config()
        elif args.subcommand == 'history':
            depth = args.depth
            history = load_command_history()[-depth:]
            for h in history:
                if args.verbose:
                    print('command\tDate\tExecution time\tVersion\tExit code\tException')
                    for h in history:
                        execution_time = time.strftime('%H:%M:%S', time.gmtime(h['execution_time']))
                        d = h['date'].isoformat()
                        print(
                            f"{h['command']}\t{d}\t{execution_time}\t{h['version']}\t{h['exit_code']}\t{h['exception']}")
                    pass
                else:
                    for h in history:
                        print(h['command'])

        elif args.subcommand == 'version':
            print(__version__)
    except MFAError as e:
        if getattr(args, 'debug', False):
            raise
        print(e)
        sys.exit(1)
    finally:
        unfix_path()


if __name__ == '__main__':
    main()