{ "train": { "log_interval": 100, "eval_interval": 1000, "seed": 42, "epochs": 1000, "learning_rate": 0.0002, "betas": [ 0.8, 0.99 ], "eps": 1e-09, "batch_size": 14, "bf16_run": false, "lr_decay": 0.99995, "segment_size": 16384, "init_lr_ratio": 1, "warmup_epochs": 0, "c_mel": 45, "c_kl": 1.0, "c_commit": 100, "skip_optimizer": true, "freeze_EN_bert": false, "freeze_YUE_bert": false, "freeze_emo": false }, "data": { "training_files": "/notebooks/bert-vits2/dataset/commonvoice17/train.list", "validation_files": "/notebooks/bert-vits2/dataset/commonvoice17/val.list", "max_wav_value": 32768.0, "sampling_rate": 44100, "filter_length": 2048, "hop_length": 512, "win_length": 2048, "n_mel_channels": 128, "mel_fmin": 0.0, "mel_fmax": null, "add_blank": true, "n_speakers": 474, "cleaned_text": true, "spk2id": { "EN_SPK0": 0, "EN_SPK1": 1, "EN_SPK2": 2, "EN_SPK3": 3, "EN_SPK4": 4, "EN_SPK5": 5, "EN_SPK6": 6, "EN_SPK7": 7, "EN_SPK8": 8, "EN_SPK9": 9, "EN_SPK10": 10, "EN_SPK11": 11, "EN_SPK12": 12, "EN_SPK13": 13, "EN_SPK14": 14, "EN_SPK15": 15, "EN_SPK16": 16, "EN_SPK17": 17, "EN_SPK18": 18, "EN_SPK19": 19, "EN_SPK20": 20, "EN_SPK21": 21, "EN_SPK22": 22, "EN_SPK23": 23, "EN_SPK24": 24, "EN_SPK25": 25, "EN_SPK26": 26, "EN_SPK27": 27, "EN_SPK28": 28, "EN_SPK29": 29, "EN_SPK30": 30, "EN_SPK31": 31, "EN_SPK32": 32, "EN_SPK33": 33, "EN_SPK34": 34, "EN_SPK35": 35, "EN_SPK36": 36, "EN_SPK37": 37, "EN_SPK38": 38, "EN_SPK39": 39, "EN_SPK40": 40, "EN_SPK41": 41, "EN_SPK42": 42, "EN_SPK43": 43, "EN_SPK44": 44, "EN_SPK45": 45, "EN_SPK46": 46, "EN_SPK47": 47, "EN_SPK48": 48, "EN_SPK49": 49, "EN_SPK50": 50, "EN_SPK51": 51, "EN_SPK52": 52, "EN_SPK53": 53, "EN_SPK54": 54, "EN_SPK55": 55, "EN_SPK56": 56, "EN_SPK57": 57, "EN_SPK58": 58, "EN_SPK59": 59, "EN_SPK60": 60, "EN_SPK61": 61, "EN_SPK62": 62, "EN_SPK63": 63, "EN_SPK64": 64, "EN_SPK65": 65, "EN_SPK66": 66, "EN_SPK67": 67, "EN_SPK68": 68, "EN_SPK69": 69, "EN_SPK70": 70, "EN_SPK71": 71, "EN_SPK72": 72, "EN_SPK73": 73, "EN_SPK74": 74, "EN_SPK75": 75, "EN_SPK76": 76, "EN_SPK77": 77, "EN_SPK78": 78, "EN_SPK79": 79, "EN_SPK80": 80, "EN_SPK81": 81, "EN_SPK82": 82, "EN_SPK83": 83, "EN_SPK84": 84, "EN_SPK85": 85, "EN_SPK86": 86, "EN_SPK87": 87, "EN_SPK88": 88, "EN_SPK89": 89, "EN_SPK90": 90, "EN_SPK91": 91, "EN_SPK92": 92, "EN_SPK93": 93, "EN_SPK94": 94, "EN_SPK95": 95, "EN_SPK96": 96, "EN_SPK97": 97, "EN_SPK98": 98, "EN_SPK99": 99, "YUE_SPK598": 100, "YUE_SPK601": 101, "YUE_SPK599": 102, "YUE_SPK587": 103, "YUE_SPK570": 104, "YUE_SPK522": 105, "YUE_SPK577": 106, "YUE_SPK600": 107, "YUE_SPK687": 108, "YUE_SPK596": 109, "YUE_SPK572": 110, "YUE_SPK678": 111, "YUE_SPK597": 112, "YUE_SPK584": 113, "YUE_SPK408": 114, "YUE_SPK594": 115, "YUE_SPK592": 116, "YUE_SPK580": 117, "YUE_SPK686": 118, "YUE_SPK585": 119, "YUE_SPK591": 120, "YUE_SPK573": 121, "YUE_SPK579": 122, "YUE_SPK590": 123, "YUE_SPK525": 124, "YUE_SPK674": 125, "YUE_SPK595": 126, "YUE_SPK589": 127, "YUE_SPK608": 128, "YUE_SPK544": 129, "YUE_SPK341": 130, "YUE_SPK555": 131, "YUE_SPK586": 132, "YUE_SPK469": 133, "YUE_SPK534": 134, "YUE_SPK578": 135, "YUE_SPK582": 136, "YUE_SPK581": 137, "YUE_SPK531": 138, "YUE_SPK583": 139, "YUE_SPK501": 140, "YUE_SPK482": 141, "YUE_SPK481": 142, "YUE_SPK660": 143, "YUE_SPK574": 144, "YUE_SPK520": 145, "YUE_SPK516": 146, "YUE_SPK381": 147, "YUE_SPK379": 148, "YUE_SPK427": 149, "YUE_SPK500": 150, "YUE_SPK493": 151, "YUE_SPK343": 152, "YUE_SPK588": 153, "YUE_SPK533": 154, "YUE_SPK662": 155, "YUE_SPK485": 156, "YUE_SPK479": 157, "YUE_SPK673": 158, "YUE_SPK416": 159, "YUE_SPK689": 160, "YUE_SPK668": 161, "YUE_SPK666": 162, "YUE_SPK459": 163, "YUE_SPK506": 164, "YUE_SPK657": 165, "YUE_SPK651": 166, "YUE_SPK685": 167, "YUE_SPK683": 168, "YUE_SPK575": 169, "YUE_SPK560": 170, "YUE_SPK437": 171, "YUE_SPK538": 172, "YUE_SPK692": 173, "YUE_SPK645": 174, "YUE_SPK688": 175, "YUE_SPK458": 176, "YUE_SPK426": 177, "YUE_SPK677": 178, "YUE_SPK622": 179, "YUE_SPK492": 180, "YUE_SPK696": 181, "YUE_SPK644": 182, "YUE_SPK637": 183, "YUE_SPK340": 184, "YUE_SPK680": 185, "YUE_SPK559": 186, "YUE_SPK698": 187, "YUE_SPK693": 188, "YUE_SPK562": 189, "YUE_SPK619": 190, "YUE_SPK404": 191, "YUE_SPK632": 192, "YUE_SPK364": 193, "YUE_SPK412": 194, "YUE_SPK443": 195, "YUE_SPK675": 196, "YUE_SPK697": 197, "YUE_SPK672": 198, "YUE_SPK510": 199, "YUE_SPK431": 200, "YUE_SPK613": 201, "YUE_SPK681": 202, "YUE_SPK655": 203, "YUE_SPK604": 204, "YUE_SPK549": 205, "YUE_SPK556": 206, "YUE_SPK407": 207, "YUE_SPK670": 208, "YUE_SPK529": 209, "YUE_SPK349": 210, "YUE_SPK474": 211, "YUE_SPK532": 212, "YUE_SPK557": 213, "YUE_SPK508": 214, "YUE_SPK467": 215, "YUE_SPK491": 216, "YUE_SPK505": 217, "YUE_SPK457": 218, "YUE_SPK439": 219, "YUE_SPK605": 220, "YUE_SPK679": 221, "YUE_SPK653": 222, "YUE_SPK646": 223, "YUE_SPK576": 224, "YUE_SPK647": 225, "YUE_SPK684": 226, "YUE_SPK633": 227, "YUE_SPK415": 228, "YUE_SPK550": 229, "YUE_SPK374": 230, "YUE_SPK563": 231, "YUE_SPK566": 232, "YUE_SPK353": 233, "YUE_SPK444": 234, "YUE_SPK571": 235, "YUE_SPK669": 236, "YUE_SPK610": 237, "YUE_SPK676": 238, "YUE_SPK648": 239, "YUE_SPK618": 240, "YUE_SPK495": 241, "YUE_SPK503": 242, "YUE_SPK694": 243, "YUE_SPK464": 244, "YUE_SPK667": 245, "YUE_SPK628": 246, "YUE_SPK636": 247, "YUE_SPK483": 248, "YUE_SPK387": 249, "YUE_SPK546": 250, "YUE_SPK552": 251, "YUE_SPK638": 252, "YUE_SPK535": 253, "YUE_SPK695": 254, "YUE_SPK568": 255, "YUE_SPK456": 256, "YUE_SPK612": 257, "YUE_SPK671": 258, "YUE_SPK640": 259, "YUE_SPK402": 260, "YUE_SPK473": 261, "YUE_SPK453": 262, "YUE_SPK629": 263, "YUE_SPK564": 264, "YUE_SPK342": 265, "YUE_SPK664": 266, "YUE_SPK478": 267, "YUE_SPK509": 268, "YUE_SPK471": 269, "YUE_SPK630": 270, "YUE_SPK699": 271, "YUE_SPK359": 272, "YUE_SPK346": 273, "YUE_SPK682": 274, "YUE_SPK643": 275, "YUE_SPK428": 276, "YUE_SPK620": 277, "YUE_SPK515": 278, "YUE_SPK547": 279, "YUE_SPK425": 280, "YUE_SPK658": 281, "YUE_SPK558": 282, "YUE_SPK650": 283, "YUE_SPK385": 284, "YUE_SPK652": 285, "YUE_SPK447": 286, "YUE_SPK567": 287, "YUE_SPK368": 288, "YUE_SPK642": 289, "YUE_SPK616": 290, "YUE_SPK690": 291, "YUE_SPK498": 292, "YUE_SPK507": 293, "YUE_SPK540": 294, "YUE_SPK527": 295, "YUE_SPK565": 296, "YUE_SPK399": 297, "YUE_SPK691": 298, "YUE_SPK649": 299, "YUE_SPK430": 300, "YUE_SPK472": 301, "YUE_SPK537": 302, "YUE_SPK337": 303, "YUE_SPK494": 304, "YUE_SPK409": 305, "YUE_SPK488": 306, "YUE_SPK624": 307, "YUE_SPK514": 308, "YUE_SPK398": 309, "YUE_SPK394": 310, "YUE_SPK499": 311, "YUE_SPK450": 312, "YUE_SPK665": 313, "YUE_SPK502": 314, "YUE_SPK480": 315, "YUE_SPK490": 316, "YUE_SPK484": 317, "YUE_SPK634": 318, "YUE_SPK476": 319, "YUE_SPK623": 320, "YUE_SPK400": 321, "YUE_SPK383": 322, "YUE_SPK440": 323, "YUE_SPK627": 324, "YUE_SPK530": 325, "YUE_SPK438": 326, "YUE_SPK486": 327, "YUE_SPK331": 328, "YUE_SPK539": 329, "YUE_SPK661": 330, "YUE_SPK609": 331, "YUE_SPK524": 332, "YUE_SPK455": 333, "YUE_SPK614": 334, "YUE_SPK372": 335, "YUE_SPK393": 336, "YUE_SPK470": 337, "YUE_SPK569": 338, "YUE_SPK442": 339, "YUE_SPK410": 340, "YUE_SPK330": 341, "YUE_SPK446": 342, "YUE_SPK656": 343, "YUE_SPK554": 344, "YUE_SPK543": 345, "YUE_SPK553": 346, "YUE_SPK631": 347, "YUE_SPK463": 348, "YUE_SPK536": 349, "YUE_SPK418": 350, "YUE_SPK441": 351, "YUE_SPK413": 352, "YUE_SPK659": 353, "YUE_SPK445": 354, "YUE_SPK654": 355, "YUE_SPK465": 356, "YUE_SPK551": 357, "YUE_SPK382": 358, "YUE_SPK663": 359, "YUE_SPK362": 360, "YUE_SPK366": 361, "YUE_SPK345": 362, "YUE_SPK422": 363, "YUE_SPK496": 364, "YUE_SPK528": 365, "YUE_SPK335": 366, "YUE_SPK417": 367, "YUE_SPK477": 368, "YUE_SPK411": 369, "YUE_SPK626": 370, "YUE_SPK475": 371, "YUE_SPK421": 372, "YUE_SPK545": 373, "YUE_SPK489": 374, "YUE_SPK504": 375, "YUE_SPK424": 376, "YUE_SPK548": 377, "YUE_SPK518": 378, "YUE_SPK377": 379, "YUE_SPK401": 380, "YUE_SPK434": 381, "YUE_SPK523": 382, "YUE_SPK466": 383, "YUE_SPK338": 384, "YUE_SPK542": 385, "YUE_SPK611": 386, "YUE_SPK641": 387, "YUE_SPK419": 388, "YUE_SPK497": 389, "YUE_SPK432": 390, "YUE_SPK461": 391, "YUE_SPK347": 392, "YUE_SPK420": 393, "YUE_SPK403": 394, "YUE_SPK406": 395, "YUE_SPK606": 396, "YUE_SPK339": 397, "YUE_SPK367": 398, "YUE_SPK452": 399, "YUE_SPK513": 400, "YUE_SPK454": 401, "YUE_SPK607": 402, "YUE_SPK625": 403, "YUE_SPK448": 404, "YUE_SPK369": 405, "YUE_SPK391": 406, "YUE_SPK621": 407, "YUE_SPK423": 408, "YUE_SPK511": 409, "YUE_SPK375": 410, "YUE_SPK526": 411, "YUE_SPK512": 412, "YUE_SPK354": 413, "YUE_SPK433": 414, "YUE_SPK396": 415, "YUE_SPK395": 416, "YUE_SPK405": 417, "YUE_SPK541": 418, "YUE_SPK561": 419, "YUE_SPK435": 420, "YUE_SPK517": 421, "YUE_SPK639": 422, "YUE_SPK635": 423, "YUE_SPK468": 424, "YUE_SPK333": 425, "YUE_SPK436": 426, "YUE_SPK429": 427, "YUE_SPK615": 428, "YUE_SPK350": 429, "YUE_SPK363": 430, "YUE_SPK414": 431, "YUE_SPK451": 432, "YUE_SPK519": 433, "YUE_SPK392": 434, "YUE_SPK361": 435, "YUE_SPK487": 436, "YUE_SPK462": 437, "YUE_SPK386": 438, "YUE_SPK388": 439, "YUE_SPK344": 440, "YUE_SPK332": 441, "YUE_SPK460": 442, "YUE_SPK449": 443, "YUE_SPK397": 444, "AUG_GOOGLE_TTS": 445, "AUG_YUE_SPK598": 446, "AUG_YUE_SPK582": 447, "AUG_YUE_SPK581": 448, "AUG_YUE_SPK589": 449, "AUG_YUE_SPK579": 450, "AUG_YUE_SPK574": 451, "AUG_YUE_SPK594": 452, "AUG_YUE_SPK591": 453, "AUG_YUE_SPK585": 454, "AUG_YUE_SPK577": 455, "AUG_YUE_SPK592": 456, "AUG_YUE_SPK595": 457, "AUG_YUE_SPK587": 458, "AUG_YUE_SPK601": 459, "AUG_YUE_SPK586": 460, "AUG_YUE_SPK575": 461, "AUG_YUE_SPK583": 462, "AUG_YUE_SPK584": 463, "AUG_YUE_SPK576": 464, "AUG_YUE_SPK596": 465, "AUG_YUE_SPK580": 466, "AUG_YUE_SPK588": 467, "AUG_YUE_SPK597": 468, "AUG_YUE_SPK599": 469, "AUG_YUE_SPK590": 470, "AUG_YUE_SPK600": 471, "AUG_YUE_SPK573": 472, "AUG_YUE_SPK578": 473 } }, "model": { "use_spk_conditioned_encoder": true, "use_noise_scaled_mas": true, "use_mel_posterior_encoder": false, "use_duration_discriminator": true, "inter_channels": 192, "hidden_channels": 192, "filter_channels": 768, "n_heads": 2, "n_layers": 6, "kernel_size": 3, "p_dropout": 0.1, "resblock": "1", "resblock_kernel_sizes": [ 3, 7, 11 ], "resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "upsample_rates": [ 8, 8, 2, 2, 2 ], "upsample_initial_channel": 512, "upsample_kernel_sizes": [ 16, 16, 8, 2, 2 ], "n_layers_q": 3, "use_spectral_norm": false, "gin_channels": 512, "slm": { "model": "./slm/wavlm-base-plus", "sr": 16000, "hidden": 768, "nlayers": 13, "initial_channel": 64 } }, "version": "2.3" }